@@ -35,8 +35,25 @@ struct ggml_backend_dnnl_buffer_context
3535 std::vector<dnnl::memory> sub_mems;
3636};
3737
38+ static void * get_memory_handle (const struct ggml_tensor * t) {
39+ auto buf_mem = dnnl::memory{(dnnl_memory_t )t->extra , true };
40+ auto buf_md = buf_mem.get_desc ();
41+ auto buf_handle = buf_mem.get_data_handle ();
42+ auto buf_offset = buf_md.get_submemory_offset ();
43+ GGML_ASSERT ((size_t )buf_offset == ((uintptr_t )t->data - DNNL_BUFFER_BASE));
44+ // auto buf_ctx = (ggml_backend_dnnl_buffer_context*)t->buffer->context;
45+ // auto parent_buf_handle = buf_ctx->mem.get_data_handle();
46+
47+ // FIXME: buf_handle + offset works for CPU only
48+ return (char *)buf_handle + buf_offset;
49+ }
50+ #else
51+ static void * get_memory_handle (const struct ggml_tensor * t) {
52+ return t->data ;
53+ }
3854#endif
3955
56+
4057namespace {
4158template <class T >
4259struct dnnl_mem_ptr {
@@ -91,15 +108,6 @@ static bool ggml_dnnl_type_supported(enum ggml_type type) {
91108
92109static bool ggml_dnnl_tensor_supported (const struct ggml_tensor * t) {
93110 auto type = t->type ;
94- GGML_TENSOR_LOCALS (int64_t , ne, t, ne)
95- GGML_TENSOR_LOCALS (size_t , nb, t, nb)
96-
97-
98- // cannot be transposed or permuted
99- // GGML_ASSERT(nb0 == ggml_type_size(type));
100- // GGML_ASSERT(nb0 <= nb1);
101- // GGML_ASSERT(nb1 <= nb2);
102- // GGML_ASSERT(nb2 <= nb3);
103111
104112 if (!ggml_dnnl_type_supported (type)) {
105113 return false ;
@@ -166,25 +174,6 @@ dnnl::memory::desc ggml_tensor_to_dnnl_md(const struct ggml_tensor * t, bool tra
166174 return dnnl::memory::desc{adims, dt, strides};
167175}
168176
169- #if USE_DNNL_BUFFER
170- static void * get_memory_handle (const struct ggml_tensor * t) {
171- auto buf_mem = dnnl::memory{(dnnl_memory_t )t->extra , true };
172- auto buf_md = buf_mem.get_desc ();
173- auto buf_handle = buf_mem.get_data_handle ();
174- auto buf_offset = buf_md.get_submemory_offset ();
175- GGML_ASSERT ((size_t )buf_offset == ((uintptr_t )t->data - DNNL_BUFFER_BASE));
176- // auto buf_ctx = (ggml_backend_dnnl_buffer_context*)t->buffer->context;
177- // auto parent_buf_handle = buf_ctx->mem.get_data_handle();
178-
179- // FIXME: buf_handle + offset works for CPU only
180- return (char *)buf_handle + buf_offset;
181- }
182- #else
183- static void * get_memory_handle (const struct ggml_tensor * t) {
184- return t->data ;
185- }
186- #endif
187-
188177dnnl::memory ggml_tensor_to_dnnl_mem (ggml_backend_t backend, const struct ggml_tensor * t, bool transpose = false ,
189178 dnnl::memory::data_type convert_to = dnnl::memory::data_type::undef,
190179 size_t ndims = GGML_MAX_DIMS) {
@@ -474,7 +463,7 @@ static ggml_status ggml_backend_dnnl_softmax(ggml_backend_t backend, struct ggml
474463 {DNNL_ARG_DST, src_mem},
475464 });
476465 }
477- // float alpha = *reinterpret_cast<float*>(dst->op_params);
466+
478467 const int axis = src_mem.get_desc ().get_dims ().size () - 1 ;
479468 auto pd = dnnl::softmax_forward::primitive_desc{ctx->engine , dnnl::prop_kind::forward_inference, dnnl::algorithm::softmax_accurate, src_mem.get_desc (), dst_mem.get_desc (), axis};
480469 auto prim = dnnl::softmax_forward{pd};
@@ -493,7 +482,6 @@ static ggml_status ggml_backend_dnnl_norm(ggml_backend_t backend, struct ggml_te
493482 auto src_mem = ggml_tensor_to_dnnl_mem (backend, src);
494483 auto dst_mem = ggml_tensor_to_dnnl_mem (backend, dst);
495484
496- // float alpha = *reinterpret_cast<float*>(dst->op_params);
497485 float eps = ((const float *)(dst->op_params ))[0 ];
498486
499487 GGML_ASSERT (eps > 0 .0f );
@@ -805,6 +793,62 @@ static ggml_status ggml_backend_dnnl_node_compute(ggml_backend_t backend, struct
805793 */
806794}
807795
796+ static bool ggml_backend_dnnl_node_supported (ggml_backend_t backend, const struct ggml_tensor * node) {
797+ GGML_UNUSED (backend);
798+ // return false;
799+ switch (node->op ) {
800+ case GGML_OP_NONE:
801+ return true ;
802+ case GGML_OP_RESHAPE:
803+ case GGML_OP_VIEW:
804+ case GGML_OP_PERMUTE:
805+ case GGML_OP_TRANSPOSE:
806+ case GGML_OP_ADD:
807+ case GGML_OP_ADD1:
808+ case GGML_OP_SUB:
809+ case GGML_OP_MUL:
810+ case GGML_OP_DIV:
811+ case GGML_OP_SQR:
812+ case GGML_OP_SQRT:
813+ case GGML_OP_LOG:
814+ case GGML_OP_CONT:
815+ case GGML_OP_CPY:
816+ case GGML_OP_DUP:
817+ case GGML_OP_SCALE:
818+ case GGML_OP_DIAG_MASK_ZERO:
819+ case GGML_OP_DIAG_MASK_INF:
820+ case GGML_OP_SOFT_MAX:
821+ case GGML_OP_GET_ROWS:
822+ return ggml_dnnl_tensor_supported (node) && ggml_dnnl_tensor_supported (node->src [0 ]);
823+ case GGML_OP_MUL_MAT:
824+ return ggml_compute_forward_mul_mat_use_dnnl (node);
825+ case GGML_OP_UNARY:
826+ {
827+ enum ggml_unary_op uop = ggml_get_unary_op (node);
828+ switch (uop) {
829+ case GGML_UNARY_OP_ABS:
830+ case GGML_UNARY_OP_TANH:
831+ case GGML_UNARY_OP_ELU:
832+ case GGML_UNARY_OP_RELU:
833+ case GGML_UNARY_OP_GELU:
834+ case GGML_UNARY_OP_GELU_QUICK:
835+ case GGML_UNARY_OP_HARDSWISH:
836+ case GGML_UNARY_OP_HARDSIGMOID:
837+ return ggml_dnnl_tensor_supported (node) && ggml_dnnl_tensor_supported (node->src [0 ]);
838+ default :
839+ // GGML_UNARY_OP_SGN,
840+ // GGML_UNARY_OP_NEG,
841+ // GGML_UNARY_OP_STEP,
842+ // GGML_UNARY_OP_SILU,
843+ return false ;
844+ }
845+ }
846+
847+ default :
848+ return false ;
849+ }
850+ }
851+
808852// buffer interface
809853
810854#if USE_DNNL_BUFFER
@@ -949,23 +993,6 @@ GGML_CALL static const char * ggml_backend_dnnl_buffer_type_get_name(ggml_backen
949993 GGML_UNUSED (buft);
950994}
951995
952- // GGML_CALL static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
953- // size += TENSOR_ALIGNMENT; // malloc may return an address that is not aligned
954- // void * data = malloc(size); // TODO: use GGML_ALIGNED_MALLOC (move to ggml-impl.h)
955- // if (data == NULL) {
956- // fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size);
957- // return NULL;
958- // }
959-
960- // return ggml_backend_buffer_init(buft, cpu_backend_buffer_i, data, size);
961- // }
962-
963- // GGML_CALL static size_t ggml_backend_cpu_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
964- // return TENSOR_ALIGNMENT;
965-
966- // GGML_UNUSED(buft);
967- // }
968-
969996GGML_CALL static bool ggml_backend_dnnl_buffer_type_supports_backend (ggml_backend_buffer_type_t buft, ggml_backend_t backend) {
970997 return ggml_backend_is_dnnl (backend) || ggml_backend_is_cpu (backend);
971998
@@ -1037,67 +1064,7 @@ GGML_CALL static ggml_status ggml_backend_dnnl_graph_compute(ggml_backend_t back
10371064}
10381065
10391066GGML_CALL static bool ggml_backend_dnnl_supports_op (ggml_backend_t backend, const struct ggml_tensor * op) {
1040- GGML_UNUSED (backend);
1041- // return false;
1042- switch (op->op ) {
1043- case GGML_OP_NONE:
1044- return true ;
1045- case GGML_OP_RESHAPE:
1046- case GGML_OP_VIEW:
1047- case GGML_OP_PERMUTE:
1048- case GGML_OP_TRANSPOSE:
1049- case GGML_OP_ADD:
1050- case GGML_OP_ADD1:
1051- case GGML_OP_SUB:
1052- case GGML_OP_MUL:
1053- case GGML_OP_DIV:
1054- case GGML_OP_SQR:
1055- case GGML_OP_SQRT:
1056- case GGML_OP_LOG:
1057- case GGML_OP_CONT:
1058- case GGML_OP_CPY:
1059- case GGML_OP_DUP:
1060- case GGML_OP_SCALE:
1061- case GGML_OP_DIAG_MASK_ZERO:
1062- case GGML_OP_DIAG_MASK_INF:
1063- case GGML_OP_SOFT_MAX:
1064- case GGML_OP_GET_ROWS:
1065- return ggml_dnnl_tensor_supported (op) && ggml_dnnl_tensor_supported (op->src [0 ]);
1066- case GGML_OP_MUL_MAT:
1067- return ggml_compute_forward_mul_mat_use_dnnl (op);
1068- case GGML_OP_UNARY:
1069- {
1070- enum ggml_unary_op uop = ggml_get_unary_op (op);
1071- switch (uop) {
1072- case GGML_UNARY_OP_ABS:
1073- case GGML_UNARY_OP_TANH:
1074- case GGML_UNARY_OP_ELU:
1075- case GGML_UNARY_OP_RELU:
1076- case GGML_UNARY_OP_GELU:
1077- case GGML_UNARY_OP_GELU_QUICK:
1078- case GGML_UNARY_OP_HARDSWISH:
1079- case GGML_UNARY_OP_HARDSIGMOID:
1080- return ggml_dnnl_tensor_supported (op) && ggml_dnnl_tensor_supported (op->src [0 ]);
1081- default :
1082- // GGML_UNARY_OP_SGN,
1083- // GGML_UNARY_OP_NEG,
1084- // GGML_UNARY_OP_STEP,
1085- // GGML_UNARY_OP_SILU,
1086- return false ;
1087- }
1088- }
1089-
1090- default :
1091- return false ;
1092- }
1093- }
1094-
1095- GGML_CALL static bool ggml_backend_dnnl_offload_op (ggml_backend_t backend, const ggml_tensor * op) {
1096- // const int min_batch_size = 32;
1097-
1098- return ggml_backend_dnnl_supports_op (backend, op);
1099-
1100- // GGML_UNUSED(backend);
1067+ return ggml_backend_dnnl_node_supported (backend, op);
11011068}
11021069
11031070static struct ggml_backend_i dnnl_backend_i = {
@@ -1113,7 +1080,7 @@ static struct ggml_backend_i dnnl_backend_i = {
11131080 /* .graph_plan_compute = */ NULL ,
11141081 /* .graph_compute = */ ggml_backend_dnnl_graph_compute,
11151082 /* .supports_op = */ ggml_backend_dnnl_supports_op,
1116- /* .offload_op = */ NULL , // ggml_backend_dnnl_offload_op,
1083+ /* .offload_op = */ NULL ,
11171084 /* .event_new = */ NULL ,
11181085 /* .event_free = */ NULL ,
11191086 /* .event_record = */ NULL ,
0 commit comments