@@ -50,119 +50,6 @@ class PyQnnManager {
5050 qnn_executorch_options, qnn_executorch_context_binary_);
5151 }
5252
53- // used during stage 2 of multi-graph mode
54- explicit PyQnnManager (const py::bytes& buffer, const py::list& qcirs)
55- : qnn_executorch_option_ptr_(buffer) {
56- auto qnn_executorch_options = GetQnnExecuTorchOptions (
57- qnn_executorch_option_ptr_.cast <std::string_view>().data ());
58-
59- // merge multiple qcirs into one context with multiple graphs
60-
61- // We start retrieving tensor from offsets = 0.
62- std::vector<uint32_t > offsets (1 , 0 );
63- std::vector<uint8_t > tensor_data;
64- std::vector<uint8_t *> tensor_ptr;
65- std::vector<uint64_t > tensor_size;
66- uint64_t total_tensor_size = 0 ;
67- for (size_t i = 0 ; i < qcirs.size (); ++i) {
68- py::buffer_info info (py::buffer (qcirs[i].cast <py::bytes>()).request ());
69-
70- uint8_t * qcir_custom_buffer_ptr = static_cast <uint8_t *>(info.ptr );
71- QnnQcirCustomProtocol qnn_qcir_custom_protocol;
72- auto [status, _, qcir_tensor_size, __, qcir_tensor_ptr] =
73- qnn_qcir_custom_protocol.DeserializeQcirCustomBuffer (
74- qcir_custom_buffer_ptr);
75-
76- if (status != Error::Ok) {
77- QNN_EXECUTORCH_LOG_ERROR (" Fail to verify QnnQcirCustomProtocol" );
78- return ;
79- }
80-
81- tensor_ptr.push_back (static_cast <uint8_t *>(qcir_tensor_ptr));
82- tensor_size.push_back (qcir_tensor_size);
83- total_tensor_size += qcir_tensor_size;
84- offsets.push_back (offsets.back () + qcir_tensor_size);
85- }
86-
87- tensor_data.resize (total_tensor_size);
88-
89- // store multiple graphs tensor in a contiguous memory space
90- for (size_t i = 0 ; i < tensor_ptr.size (); ++i) {
91- std::memcpy (
92- tensor_data.data () + offsets[i], tensor_ptr[i], tensor_size[i]);
93- }
94-
95- std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
96- for (size_t i = 0 ; i < qcirs.size (); ++i) {
97- py::buffer_info info (py::buffer (qcirs[i].cast <py::bytes>()).request ());
98-
99- uint8_t * qcir_custom_buffer_ptr = static_cast <uint8_t *>(info.ptr );
100- QnnQcirCustomProtocol qnn_qcir_custom_protocol;
101- auto [status, qcir_fbs_size, _, qcir_fbs_ptr, __] =
102- qnn_qcir_custom_protocol.DeserializeQcirCustomBuffer (
103- qcir_custom_buffer_ptr);
104-
105- if (status != Error::Ok) {
106- QNN_EXECUTORCH_LOG_ERROR (" Fail to verify QnnQcirCustomProtocol" );
107- return ;
108- }
109-
110- auto context = qcir::GetContext (qcir_fbs_ptr);
111- for (const auto & graph : *context->graphs ()) {
112- std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
113- for (const auto tensor : *graph->tensors ()) {
114- // here we need to take a detour to merge multiple qcir flatbuffers
115- // outer ToTensor
116- // return: flatbuffers::Offset<Tensor>
117- // consume: QnnTensor, data_offset, flatbuffers::FlatBufferBuilder*
118- // inner ToTensor
119- // return: QnnTensor
120- // consume:
121- // flatbuffers::Vector<::flatbuffers::Offset<qcir::Tensor>>,
122- // data_ptr
123- tensors.emplace_back (ToTensor (
124- ToTensor (tensor, nullptr ),
125- offsets[i] + tensor->offset (),
126- &builder_));
127- }
128- std::vector<flatbuffers::Offset<qcir::Operator>> nodes;
129- for (const auto & node : *graph->nodes ()) {
130- uint32_t * inputs_ptr = const_cast <uint32_t *>(node->inputs ()->data ());
131- uint32_t * outputs_ptr =
132- const_cast <uint32_t *>(node->outputs ()->data ());
133- uint32_t * params_ptr = const_cast <uint32_t *>(node->params ()->data ());
134- std::vector<uint32_t > inputs (
135- inputs_ptr, inputs_ptr + node->inputs ()->size ());
136- std::vector<uint32_t > outputs (
137- outputs_ptr, outputs_ptr + node->outputs ()->size ());
138- std::vector<uint32_t > params (
139- params_ptr, params_ptr + node->params ()->size ());
140- nodes.emplace_back (qcir::CreateOperatorDirect (
141- builder_,
142- node->name ()->str ().c_str (),
143- node->package_name ()->str ().c_str (),
144- node->type_name ()->str ().c_str (),
145- &inputs,
146- &outputs,
147- ¶ms));
148- }
149- graphs.emplace_back (qcir::CreateGraphDirect (
150- builder_, graph->name ()->str ().c_str (), &nodes, &tensors));
151- }
152- }
153-
154- auto context = qcir::CreateContextDirect (builder_, &graphs);
155- builder_.Finish (context);
156- QnnExecuTorchContextBinary qcir_bin (
157- {builder_.GetBufferPointer (), builder_.GetSize ()});
158-
159- // Init QnnQcirCustomProtocol binary
160- qnn_executorch_context_binary_ =
161- MakeQcirCustomBinaryInfo (qcir_bin, tensor_data);
162- qnn_manager_ = std::make_shared<QnnManager>(
163- qnn_executorch_options, qnn_executorch_context_binary_);
164- }
165-
16653 executorch::runtime::Error Init () {
16754 return qnn_manager_->Init ();
16855 }
@@ -172,121 +59,108 @@ class PyQnnManager {
17259 return qnn_manager_->IsNodeSupportedByBackend (op_wrappers);
17360 }
17461
175- // this method is specific for stage 2 of compiling multi-graphs
176- py::array_t <char > Compile () {
177- if (qnn_manager_->CompileQcir () != Error::Ok) {
178- QNN_EXECUTORCH_LOG_ERROR (" Fail to compile qcir" );
179- return py::array_t <char >(0 );
180- }
181-
182- // generate context binary if compilation succeded
183- QnnExecuTorchContextBinary binary_info;
184- qnn_manager_->GetContextBinary (binary_info);
185- // allocate py::array (to pass the result of the C++ function to Python)
186- auto result = py::array_t <char >(binary_info.nbytes );
187- auto result_buffer = result.request ();
188- char * result_ptr = (char *)result_buffer.ptr ;
189- std::memcpy (result_ptr, binary_info.buffer , binary_info.nbytes );
190- return result;
191- }
192-
19362 py::array_t <char > Compile (
194- const std::string& graph_name ,
195- std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
63+ const std::vector<std:: string>& graph_names ,
64+ std::vector<std::vector<std:: shared_ptr<OpWrapper> >>& op_wrappers) {
19665 QnnExecuTorchContextBinary binary_info;
19766
198- if (qnn_manager_->IsOnlinePrepare () || qnn_manager_-> IsMultipleGraphs () ) {
67+ if (qnn_manager_->IsOnlinePrepare ()) {
19968 builder_.Reset ();
69+ std::vector<flatbuffers::Offset<qcir::Graph>> fb_graphs;
20070 std::vector<uint8_t > tensor_data;
20171 std::vector<uint64_t > offsets;
20272 std::unordered_map<void *, int > tensor_map;
203- std::vector<flatbuffers::Offset<qcir::Tensor>> fb_tensors;
204- std::vector<flatbuffers::Offset<qcir::Operator>> fb_ops;
205-
206- auto set_tensor = [&](const std::shared_ptr<TensorWrapper>& wrapper,
207- std::vector<uint32_t >& index) {
208- auto it = tensor_map.find (wrapper.get ());
209- if (it != tensor_map.end ()) {
210- index.push_back (it->second );
211- } else {
212- tensor_map[wrapper.get ()] = fb_tensors.size ();
213- index.push_back (fb_tensors.size ());
214- offsets.push_back (tensor_data.size ());
215- Qnn_Tensor_t qnn_tensor = wrapper->CloneTensorStruct ();
216- fb_tensors.emplace_back (
217- ToTensor (qnn_tensor, offsets.back (), &builder_));
218- uint8_t * data_ptr = static_cast <uint8_t *>(
219- QNN_TENSOR_VER_PTR (qnn_tensor)->clientBuf .data );
220- if (data_ptr != nullptr ) {
221- tensor_data.insert (
222- tensor_data.end (),
223- data_ptr,
224- data_ptr + QNN_TENSOR_VER_PTR (qnn_tensor)->clientBuf .dataSize );
73+
74+ for (int i = 0 ; i < graph_names.size (); ++i) {
75+ std::vector<flatbuffers::Offset<qcir::Tensor>> fb_tensors;
76+ std::vector<flatbuffers::Offset<qcir::Operator>> fb_ops;
77+
78+ auto set_tensor = [&](const std::shared_ptr<TensorWrapper>& wrapper,
79+ std::vector<uint32_t >& index) {
80+ auto it = tensor_map.find (wrapper.get ());
81+ if (it != tensor_map.end ()) {
82+ index.push_back (it->second );
83+ } else {
84+ tensor_map[wrapper.get ()] = fb_tensors.size ();
85+ index.push_back (fb_tensors.size ());
86+ offsets.push_back (tensor_data.size ());
87+ Qnn_Tensor_t qnn_tensor = wrapper->CloneTensorStruct ();
88+ fb_tensors.emplace_back (
89+ ToTensor (qnn_tensor, offsets.back (), &builder_));
90+ uint8_t * data_ptr = static_cast <uint8_t *>(
91+ QNN_TENSOR_VER_PTR (qnn_tensor)->clientBuf .data );
92+ if (data_ptr != nullptr ) {
93+ tensor_data.insert (
94+ tensor_data.end (),
95+ data_ptr,
96+ data_ptr +
97+ QNN_TENSOR_VER_PTR (qnn_tensor)->clientBuf .dataSize );
98+ }
22599 }
226- }
227- };
100+ };
228101
229- for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers) {
230- std::vector<uint32_t > inputs, outputs, params;
102+ for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers[i] ) {
103+ std::vector<uint32_t > inputs, outputs, params;
231104
232- for (const auto & tensor_wrapper : op_wrapper->GetInputTensors ()) {
233- set_tensor (tensor_wrapper, inputs);
234- }
105+ for (const auto & tensor_wrapper : op_wrapper->GetInputTensors ()) {
106+ set_tensor (tensor_wrapper, inputs);
107+ }
235108
236- for (const auto & tensor_wrapper : op_wrapper->GetOutputTensors ()) {
237- set_tensor (tensor_wrapper, outputs);
238- }
109+ for (const auto & tensor_wrapper : op_wrapper->GetOutputTensors ()) {
110+ set_tensor (tensor_wrapper, outputs);
111+ }
239112
240- for (const auto & param : op_wrapper->GetParams ()) {
241- auto * p_tensor_param = dynamic_cast <TensorParamWrapper*>(param.get ());
242- if (p_tensor_param != nullptr ) {
243- auto wrapper = p_tensor_param->GetTensorWrapper ();
244- wrapper->SetName (param->GetName ());
245- set_tensor (wrapper, params);
246- } else {
247- executorch::runtime::Error err = param->PopulateQnnParam ();
248- if (err != executorch::runtime::Error::Ok) {
249- QNN_EXECUTORCH_LOG_ERROR (
250- " Fail to get scalar parameter in online prepare stage" );
251- return py::array_t <char >(0 );
113+ for (const auto & param : op_wrapper->GetParams ()) {
114+ auto * p_tensor_param =
115+ dynamic_cast <TensorParamWrapper*>(param.get ());
116+ if (p_tensor_param != nullptr ) {
117+ auto wrapper = p_tensor_param->GetTensorWrapper ();
118+ wrapper->SetName (param->GetName ());
119+ set_tensor (wrapper, params);
120+ } else {
121+ executorch::runtime::Error err = param->PopulateQnnParam ();
122+ if (err != executorch::runtime::Error::Ok) {
123+ QNN_EXECUTORCH_LOG_ERROR (
124+ " Fail to get scalar parameter in online prepare stage" );
125+ return py::array_t <char >(0 );
126+ }
127+ Qnn_Param_t p = param->GetQnnParam ();
128+ Qnn_Tensor_t t (
129+ {.version = QNN_TENSOR_VERSION_2, .v2 = QNN_TENSOR_V2_INIT});
130+ QNN_TENSOR_VER_PTR (t)->name = p.name ;
131+ QNN_TENSOR_VER_PTR (t)->dataType = p.scalarParam .dataType ;
132+ QNN_TENSOR_VER_PTR (t)->clientBuf .data =
133+ static_cast <void *>(&p.scalarParam .uint8Value );
134+ QNN_TENSOR_VER_PTR (t)->clientBuf .dataSize =
135+ GetDataTypeSize (QNN_TENSOR_VER_PTR (t)->dataType );
136+
137+ // collect tensor data
138+ offsets.push_back (tensor_data.size ());
139+ const uint8_t * data_ptr =
140+ static_cast <uint8_t *>(QNN_TENSOR_VER_PTR (t)->clientBuf .data );
141+ tensor_data.insert (
142+ tensor_data.end (),
143+ data_ptr,
144+ data_ptr + QNN_TENSOR_VER_PTR (t)->clientBuf .dataSize );
145+ params.push_back (fb_tensors.size ());
146+ fb_tensors.emplace_back (ToTensor (t, offsets.back (), &builder_));
252147 }
253- Qnn_Param_t p = param->GetQnnParam ();
254- Qnn_Tensor_t t (
255- {.version = QNN_TENSOR_VERSION_2, .v2 = QNN_TENSOR_V2_INIT});
256- QNN_TENSOR_VER_PTR (t)->name = p.name ;
257- QNN_TENSOR_VER_PTR (t)->dataType = p.scalarParam .dataType ;
258- QNN_TENSOR_VER_PTR (t)->clientBuf .data =
259- static_cast <void *>(&p.scalarParam .uint8Value );
260- QNN_TENSOR_VER_PTR (t)->clientBuf .dataSize =
261- GetDataTypeSize (QNN_TENSOR_VER_PTR (t)->dataType );
262-
263- // collect tensor data
264- offsets.push_back (tensor_data.size ());
265- const uint8_t * data_ptr =
266- static_cast <uint8_t *>(QNN_TENSOR_VER_PTR (t)->clientBuf .data );
267- tensor_data.insert (
268- tensor_data.end (),
269- data_ptr,
270- data_ptr + QNN_TENSOR_VER_PTR (t)->clientBuf .dataSize );
271- params.push_back (fb_tensors.size ());
272- fb_tensors.emplace_back (ToTensor (t, offsets.back (), &builder_));
273148 }
274- }
275149
276- Qnn_OpConfig_t op_config = op_wrapper->GetOpConfig ();
277- fb_ops.emplace_back (qcir::CreateOperatorDirect (
278- builder_,
279- QNN_OP_VER_PTR (op_config)->name ,
280- QNN_OP_VER_PTR (op_config)->packageName ,
281- QNN_OP_VER_PTR (op_config)->typeName ,
282- &inputs,
283- &outputs,
284- ¶ms));
150+ Qnn_OpConfig_t op_config = op_wrapper->GetOpConfig ();
151+ fb_ops.emplace_back (qcir::CreateOperatorDirect (
152+ builder_,
153+ QNN_OP_VER_PTR (op_config)->name ,
154+ QNN_OP_VER_PTR (op_config)->packageName ,
155+ QNN_OP_VER_PTR (op_config)->typeName ,
156+ &inputs,
157+ &outputs,
158+ ¶ms));
159+ }
160+ fb_graphs.emplace_back (qcir::CreateGraphDirect (
161+ builder_, graph_names[i].c_str (), &fb_ops, &fb_tensors));
285162 }
286163
287- std::vector<flatbuffers::Offset<qcir::Graph>> fb_graphs (
288- {qcir::CreateGraphDirect (
289- builder_, graph_name.c_str (), &fb_ops, &fb_tensors)});
290164 auto context = qcir::CreateContextDirect (builder_, &fb_graphs);
291165 builder_.Finish (context);
292166
@@ -300,10 +174,12 @@ class PyQnnManager {
300174 std::tie (binary_info.buffer , binary_info.nbytes ) =
301175 custom_qcir_protocol_buffer_.GetCustomProtocolBuffer ();
302176 } else {
303- if (qnn_manager_->Compile (graph_name, op_wrappers) !=
304- executorch::runtime::Error::Ok) {
305- QNN_EXECUTORCH_LOG_ERROR (" Fail to compile QNN graph" );
306- return py::array_t <char >(0 );
177+ for (int i = 0 ; i < graph_names.size (); ++i) {
178+ if (qnn_manager_->Compile (graph_names[i], op_wrappers[i]) !=
179+ executorch::runtime::Error::Ok) {
180+ QNN_EXECUTORCH_LOG_ERROR (" Fail to compile QNN graph" );
181+ return py::array_t <char >(0 );
182+ }
307183 }
308184 if (qnn_manager_->GetContextBinary (binary_info) !=
309185 executorch::runtime::Error::Ok) {
0 commit comments