Skip to content

Commit f341379

Browse files
committed
Qualcomm AI Engine Direct - multi-method support
Summary - refactor to adopt multi-method change - framwork change to meet use case
1 parent 9a18669 commit f341379

File tree

18 files changed

+340
-660
lines changed

18 files changed

+340
-660
lines changed

backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,14 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
3030
py::class_<PyQnnManager, std::shared_ptr<PyQnnManager>>(m, "QnnManager")
3131
.def(py::init<const py::bytes&>())
3232
.def(py::init<const py::bytes&, const py::bytes&>())
33-
.def(py::init<const py::bytes&, const py::list&>())
3433
.def("Init", &PyQnnManager::Init)
3534
.def("IsNodeSupportedByBackend", &PyQnnManager::IsNodeSupportedByBackend)
36-
.def("Compile", py::overload_cast<>(&PyQnnManager::Compile))
3735
.def(
3836
"Compile",
3937
py::overload_cast<
40-
const std::string&,
41-
std::vector<std::shared_ptr<OpWrapper>>&>(&PyQnnManager::Compile))
38+
const std::vector<std::string>&,
39+
std::vector<std::vector<std::shared_ptr<OpWrapper>>>&>(
40+
&PyQnnManager::Compile))
4241
.def("Destroy", &PyQnnManager::Destroy)
4342
.def("IsAvailable", &PyQnnManager::IsAvailable)
4443
.def("IsTensorDump", &PyQnnManager::IsTensorDump)

backends/qualcomm/aot/python/PyQnnManagerAdaptor.h

Lines changed: 91 additions & 215 deletions
Original file line numberDiff line numberDiff line change
@@ -50,119 +50,6 @@ class PyQnnManager {
5050
qnn_executorch_options, qnn_executorch_context_binary_);
5151
}
5252

53-
// used during stage 2 of multi-graph mode
54-
explicit PyQnnManager(const py::bytes& buffer, const py::list& qcirs)
55-
: qnn_executorch_option_ptr_(buffer) {
56-
auto qnn_executorch_options = GetQnnExecuTorchOptions(
57-
qnn_executorch_option_ptr_.cast<std::string_view>().data());
58-
59-
// merge multiple qcirs into one context with multiple graphs
60-
61-
// We start retrieving tensor from offsets = 0.
62-
std::vector<uint32_t> offsets(1, 0);
63-
std::vector<uint8_t> tensor_data;
64-
std::vector<uint8_t*> tensor_ptr;
65-
std::vector<uint64_t> tensor_size;
66-
uint64_t total_tensor_size = 0;
67-
for (size_t i = 0; i < qcirs.size(); ++i) {
68-
py::buffer_info info(py::buffer(qcirs[i].cast<py::bytes>()).request());
69-
70-
uint8_t* qcir_custom_buffer_ptr = static_cast<uint8_t*>(info.ptr);
71-
QnnQcirCustomProtocol qnn_qcir_custom_protocol;
72-
auto [status, _, qcir_tensor_size, __, qcir_tensor_ptr] =
73-
qnn_qcir_custom_protocol.DeserializeQcirCustomBuffer(
74-
qcir_custom_buffer_ptr);
75-
76-
if (status != Error::Ok) {
77-
QNN_EXECUTORCH_LOG_ERROR("Fail to verify QnnQcirCustomProtocol");
78-
return;
79-
}
80-
81-
tensor_ptr.push_back(static_cast<uint8_t*>(qcir_tensor_ptr));
82-
tensor_size.push_back(qcir_tensor_size);
83-
total_tensor_size += qcir_tensor_size;
84-
offsets.push_back(offsets.back() + qcir_tensor_size);
85-
}
86-
87-
tensor_data.resize(total_tensor_size);
88-
89-
// store multiple graphs tensor in a contiguous memory space
90-
for (size_t i = 0; i < tensor_ptr.size(); ++i) {
91-
std::memcpy(
92-
tensor_data.data() + offsets[i], tensor_ptr[i], tensor_size[i]);
93-
}
94-
95-
std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
96-
for (size_t i = 0; i < qcirs.size(); ++i) {
97-
py::buffer_info info(py::buffer(qcirs[i].cast<py::bytes>()).request());
98-
99-
uint8_t* qcir_custom_buffer_ptr = static_cast<uint8_t*>(info.ptr);
100-
QnnQcirCustomProtocol qnn_qcir_custom_protocol;
101-
auto [status, qcir_fbs_size, _, qcir_fbs_ptr, __] =
102-
qnn_qcir_custom_protocol.DeserializeQcirCustomBuffer(
103-
qcir_custom_buffer_ptr);
104-
105-
if (status != Error::Ok) {
106-
QNN_EXECUTORCH_LOG_ERROR("Fail to verify QnnQcirCustomProtocol");
107-
return;
108-
}
109-
110-
auto context = qcir::GetContext(qcir_fbs_ptr);
111-
for (const auto& graph : *context->graphs()) {
112-
std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
113-
for (const auto tensor : *graph->tensors()) {
114-
// here we need to take a detour to merge multiple qcir flatbuffers
115-
// outer ToTensor
116-
// return: flatbuffers::Offset<Tensor>
117-
// consume: QnnTensor, data_offset, flatbuffers::FlatBufferBuilder*
118-
// inner ToTensor
119-
// return: QnnTensor
120-
// consume:
121-
// flatbuffers::Vector<::flatbuffers::Offset<qcir::Tensor>>,
122-
// data_ptr
123-
tensors.emplace_back(ToTensor(
124-
ToTensor(tensor, nullptr),
125-
offsets[i] + tensor->offset(),
126-
&builder_));
127-
}
128-
std::vector<flatbuffers::Offset<qcir::Operator>> nodes;
129-
for (const auto& node : *graph->nodes()) {
130-
uint32_t* inputs_ptr = const_cast<uint32_t*>(node->inputs()->data());
131-
uint32_t* outputs_ptr =
132-
const_cast<uint32_t*>(node->outputs()->data());
133-
uint32_t* params_ptr = const_cast<uint32_t*>(node->params()->data());
134-
std::vector<uint32_t> inputs(
135-
inputs_ptr, inputs_ptr + node->inputs()->size());
136-
std::vector<uint32_t> outputs(
137-
outputs_ptr, outputs_ptr + node->outputs()->size());
138-
std::vector<uint32_t> params(
139-
params_ptr, params_ptr + node->params()->size());
140-
nodes.emplace_back(qcir::CreateOperatorDirect(
141-
builder_,
142-
node->name()->str().c_str(),
143-
node->package_name()->str().c_str(),
144-
node->type_name()->str().c_str(),
145-
&inputs,
146-
&outputs,
147-
&params));
148-
}
149-
graphs.emplace_back(qcir::CreateGraphDirect(
150-
builder_, graph->name()->str().c_str(), &nodes, &tensors));
151-
}
152-
}
153-
154-
auto context = qcir::CreateContextDirect(builder_, &graphs);
155-
builder_.Finish(context);
156-
QnnExecuTorchContextBinary qcir_bin(
157-
{builder_.GetBufferPointer(), builder_.GetSize()});
158-
159-
// Init QnnQcirCustomProtocol binary
160-
qnn_executorch_context_binary_ =
161-
MakeQcirCustomBinaryInfo(qcir_bin, tensor_data);
162-
qnn_manager_ = std::make_shared<QnnManager>(
163-
qnn_executorch_options, qnn_executorch_context_binary_);
164-
}
165-
16653
executorch::runtime::Error Init() {
16754
return qnn_manager_->Init();
16855
}
@@ -172,121 +59,108 @@ class PyQnnManager {
17259
return qnn_manager_->IsNodeSupportedByBackend(op_wrappers);
17360
}
17461

175-
// this method is specific for stage 2 of compiling multi-graphs
176-
py::array_t<char> Compile() {
177-
if (qnn_manager_->CompileQcir() != Error::Ok) {
178-
QNN_EXECUTORCH_LOG_ERROR("Fail to compile qcir");
179-
return py::array_t<char>(0);
180-
}
181-
182-
// generate context binary if compilation succeded
183-
QnnExecuTorchContextBinary binary_info;
184-
qnn_manager_->GetContextBinary(binary_info);
185-
// allocate py::array (to pass the result of the C++ function to Python)
186-
auto result = py::array_t<char>(binary_info.nbytes);
187-
auto result_buffer = result.request();
188-
char* result_ptr = (char*)result_buffer.ptr;
189-
std::memcpy(result_ptr, binary_info.buffer, binary_info.nbytes);
190-
return result;
191-
}
192-
19362
py::array_t<char> Compile(
194-
const std::string& graph_name,
195-
std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
63+
const std::vector<std::string>& graph_names,
64+
std::vector<std::vector<std::shared_ptr<OpWrapper>>>& op_wrappers) {
19665
QnnExecuTorchContextBinary binary_info;
19766

198-
if (qnn_manager_->IsOnlinePrepare() || qnn_manager_->IsMultipleGraphs()) {
67+
if (qnn_manager_->IsOnlinePrepare()) {
19968
builder_.Reset();
69+
std::vector<flatbuffers::Offset<qcir::Graph>> fb_graphs;
20070
std::vector<uint8_t> tensor_data;
20171
std::vector<uint64_t> offsets;
20272
std::unordered_map<void*, int> tensor_map;
203-
std::vector<flatbuffers::Offset<qcir::Tensor>> fb_tensors;
204-
std::vector<flatbuffers::Offset<qcir::Operator>> fb_ops;
205-
206-
auto set_tensor = [&](const std::shared_ptr<TensorWrapper>& wrapper,
207-
std::vector<uint32_t>& index) {
208-
auto it = tensor_map.find(wrapper.get());
209-
if (it != tensor_map.end()) {
210-
index.push_back(it->second);
211-
} else {
212-
tensor_map[wrapper.get()] = fb_tensors.size();
213-
index.push_back(fb_tensors.size());
214-
offsets.push_back(tensor_data.size());
215-
Qnn_Tensor_t qnn_tensor = wrapper->CloneTensorStruct();
216-
fb_tensors.emplace_back(
217-
ToTensor(qnn_tensor, offsets.back(), &builder_));
218-
uint8_t* data_ptr = static_cast<uint8_t*>(
219-
QNN_TENSOR_VER_PTR(qnn_tensor)->clientBuf.data);
220-
if (data_ptr != nullptr) {
221-
tensor_data.insert(
222-
tensor_data.end(),
223-
data_ptr,
224-
data_ptr + QNN_TENSOR_VER_PTR(qnn_tensor)->clientBuf.dataSize);
73+
74+
for (int i = 0; i < graph_names.size(); ++i) {
75+
std::vector<flatbuffers::Offset<qcir::Tensor>> fb_tensors;
76+
std::vector<flatbuffers::Offset<qcir::Operator>> fb_ops;
77+
78+
auto set_tensor = [&](const std::shared_ptr<TensorWrapper>& wrapper,
79+
std::vector<uint32_t>& index) {
80+
auto it = tensor_map.find(wrapper.get());
81+
if (it != tensor_map.end()) {
82+
index.push_back(it->second);
83+
} else {
84+
tensor_map[wrapper.get()] = fb_tensors.size();
85+
index.push_back(fb_tensors.size());
86+
offsets.push_back(tensor_data.size());
87+
Qnn_Tensor_t qnn_tensor = wrapper->CloneTensorStruct();
88+
fb_tensors.emplace_back(
89+
ToTensor(qnn_tensor, offsets.back(), &builder_));
90+
uint8_t* data_ptr = static_cast<uint8_t*>(
91+
QNN_TENSOR_VER_PTR(qnn_tensor)->clientBuf.data);
92+
if (data_ptr != nullptr) {
93+
tensor_data.insert(
94+
tensor_data.end(),
95+
data_ptr,
96+
data_ptr +
97+
QNN_TENSOR_VER_PTR(qnn_tensor)->clientBuf.dataSize);
98+
}
22599
}
226-
}
227-
};
100+
};
228101

229-
for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers) {
230-
std::vector<uint32_t> inputs, outputs, params;
102+
for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers[i]) {
103+
std::vector<uint32_t> inputs, outputs, params;
231104

232-
for (const auto& tensor_wrapper : op_wrapper->GetInputTensors()) {
233-
set_tensor(tensor_wrapper, inputs);
234-
}
105+
for (const auto& tensor_wrapper : op_wrapper->GetInputTensors()) {
106+
set_tensor(tensor_wrapper, inputs);
107+
}
235108

236-
for (const auto& tensor_wrapper : op_wrapper->GetOutputTensors()) {
237-
set_tensor(tensor_wrapper, outputs);
238-
}
109+
for (const auto& tensor_wrapper : op_wrapper->GetOutputTensors()) {
110+
set_tensor(tensor_wrapper, outputs);
111+
}
239112

240-
for (const auto& param : op_wrapper->GetParams()) {
241-
auto* p_tensor_param = dynamic_cast<TensorParamWrapper*>(param.get());
242-
if (p_tensor_param != nullptr) {
243-
auto wrapper = p_tensor_param->GetTensorWrapper();
244-
wrapper->SetName(param->GetName());
245-
set_tensor(wrapper, params);
246-
} else {
247-
executorch::runtime::Error err = param->PopulateQnnParam();
248-
if (err != executorch::runtime::Error::Ok) {
249-
QNN_EXECUTORCH_LOG_ERROR(
250-
"Fail to get scalar parameter in online prepare stage");
251-
return py::array_t<char>(0);
113+
for (const auto& param : op_wrapper->GetParams()) {
114+
auto* p_tensor_param =
115+
dynamic_cast<TensorParamWrapper*>(param.get());
116+
if (p_tensor_param != nullptr) {
117+
auto wrapper = p_tensor_param->GetTensorWrapper();
118+
wrapper->SetName(param->GetName());
119+
set_tensor(wrapper, params);
120+
} else {
121+
executorch::runtime::Error err = param->PopulateQnnParam();
122+
if (err != executorch::runtime::Error::Ok) {
123+
QNN_EXECUTORCH_LOG_ERROR(
124+
"Fail to get scalar parameter in online prepare stage");
125+
return py::array_t<char>(0);
126+
}
127+
Qnn_Param_t p = param->GetQnnParam();
128+
Qnn_Tensor_t t(
129+
{.version = QNN_TENSOR_VERSION_2, .v2 = QNN_TENSOR_V2_INIT});
130+
QNN_TENSOR_VER_PTR(t)->name = p.name;
131+
QNN_TENSOR_VER_PTR(t)->dataType = p.scalarParam.dataType;
132+
QNN_TENSOR_VER_PTR(t)->clientBuf.data =
133+
static_cast<void*>(&p.scalarParam.uint8Value);
134+
QNN_TENSOR_VER_PTR(t)->clientBuf.dataSize =
135+
GetDataTypeSize(QNN_TENSOR_VER_PTR(t)->dataType);
136+
137+
// collect tensor data
138+
offsets.push_back(tensor_data.size());
139+
const uint8_t* data_ptr =
140+
static_cast<uint8_t*>(QNN_TENSOR_VER_PTR(t)->clientBuf.data);
141+
tensor_data.insert(
142+
tensor_data.end(),
143+
data_ptr,
144+
data_ptr + QNN_TENSOR_VER_PTR(t)->clientBuf.dataSize);
145+
params.push_back(fb_tensors.size());
146+
fb_tensors.emplace_back(ToTensor(t, offsets.back(), &builder_));
252147
}
253-
Qnn_Param_t p = param->GetQnnParam();
254-
Qnn_Tensor_t t(
255-
{.version = QNN_TENSOR_VERSION_2, .v2 = QNN_TENSOR_V2_INIT});
256-
QNN_TENSOR_VER_PTR(t)->name = p.name;
257-
QNN_TENSOR_VER_PTR(t)->dataType = p.scalarParam.dataType;
258-
QNN_TENSOR_VER_PTR(t)->clientBuf.data =
259-
static_cast<void*>(&p.scalarParam.uint8Value);
260-
QNN_TENSOR_VER_PTR(t)->clientBuf.dataSize =
261-
GetDataTypeSize(QNN_TENSOR_VER_PTR(t)->dataType);
262-
263-
// collect tensor data
264-
offsets.push_back(tensor_data.size());
265-
const uint8_t* data_ptr =
266-
static_cast<uint8_t*>(QNN_TENSOR_VER_PTR(t)->clientBuf.data);
267-
tensor_data.insert(
268-
tensor_data.end(),
269-
data_ptr,
270-
data_ptr + QNN_TENSOR_VER_PTR(t)->clientBuf.dataSize);
271-
params.push_back(fb_tensors.size());
272-
fb_tensors.emplace_back(ToTensor(t, offsets.back(), &builder_));
273148
}
274-
}
275149

276-
Qnn_OpConfig_t op_config = op_wrapper->GetOpConfig();
277-
fb_ops.emplace_back(qcir::CreateOperatorDirect(
278-
builder_,
279-
QNN_OP_VER_PTR(op_config)->name,
280-
QNN_OP_VER_PTR(op_config)->packageName,
281-
QNN_OP_VER_PTR(op_config)->typeName,
282-
&inputs,
283-
&outputs,
284-
&params));
150+
Qnn_OpConfig_t op_config = op_wrapper->GetOpConfig();
151+
fb_ops.emplace_back(qcir::CreateOperatorDirect(
152+
builder_,
153+
QNN_OP_VER_PTR(op_config)->name,
154+
QNN_OP_VER_PTR(op_config)->packageName,
155+
QNN_OP_VER_PTR(op_config)->typeName,
156+
&inputs,
157+
&outputs,
158+
&params));
159+
}
160+
fb_graphs.emplace_back(qcir::CreateGraphDirect(
161+
builder_, graph_names[i].c_str(), &fb_ops, &fb_tensors));
285162
}
286163

287-
std::vector<flatbuffers::Offset<qcir::Graph>> fb_graphs(
288-
{qcir::CreateGraphDirect(
289-
builder_, graph_name.c_str(), &fb_ops, &fb_tensors)});
290164
auto context = qcir::CreateContextDirect(builder_, &fb_graphs);
291165
builder_.Finish(context);
292166

@@ -300,10 +174,12 @@ class PyQnnManager {
300174
std::tie(binary_info.buffer, binary_info.nbytes) =
301175
custom_qcir_protocol_buffer_.GetCustomProtocolBuffer();
302176
} else {
303-
if (qnn_manager_->Compile(graph_name, op_wrappers) !=
304-
executorch::runtime::Error::Ok) {
305-
QNN_EXECUTORCH_LOG_ERROR("Fail to compile QNN graph");
306-
return py::array_t<char>(0);
177+
for (int i = 0; i < graph_names.size(); ++i) {
178+
if (qnn_manager_->Compile(graph_names[i], op_wrappers[i]) !=
179+
executorch::runtime::Error::Ok) {
180+
QNN_EXECUTORCH_LOG_ERROR("Fail to compile QNN graph");
181+
return py::array_t<char>(0);
182+
}
307183
}
308184
if (qnn_manager_->GetContextBinary(binary_info) !=
309185
executorch::runtime::Error::Ok) {

0 commit comments

Comments
 (0)