1212#include " NeuronPayloadHeader.h"
1313#include " api/NeuronAdapter.h"
1414
15+ #include < executorch/runtime/executor/pte_data_map.h>
1516#include " executorch/runtime/core/error.h"
16- #include " executorch/runtime/core/exec_aten/util/dim_order_util.h"
1717
1818#include < algorithm>
1919#include < memory>
@@ -24,6 +24,7 @@ namespace executorch {
2424namespace backends {
2525namespace neuron {
2626
27+ using executorch::ET_RUNTIME_NAMESPACE::NamedDataMap;
2728using executorch::runtime::ArrayRef;
2829using executorch::runtime::BackendExecutionContext;
2930using executorch::runtime::BackendInitContext;
@@ -37,12 +38,22 @@ using executorch::runtime::Result;
3738
3839const char kHighAddrKey [] = " HighAddr" ;
3940const char kImportForeverKey [] = " ImportForever" ;
41+ const char kSharedWeightsKey [] = " ExtractSharedBlobKey" ;
4042
4143Result<DelegateHandle*> NeuronBackend::init (
4244 BackendInitContext& context,
4345 FreeableBuffer* processed,
4446 ArrayRef<CompileSpec> compile_specs) const {
4547 NeuronDelegateSetting setting;
48+ MemoryAllocator* runtime_allocator = context.get_runtime_allocator ();
49+ NeuronExecuTorchDelegate* delegate =
50+ runtime_allocator->allocateInstance <NeuronExecuTorchDelegate>();
51+ if (delegate == nullptr ) {
52+ return Error::MemoryAllocationFailed;
53+ }
54+
55+ new (delegate) NeuronExecuTorchDelegate ();
56+
4657 for (auto & compile_spec : compile_specs) {
4758 if (std::strcmp (compile_spec.key , kHighAddrKey ) == 0 ) {
4859 setting.mHighAddr = *static_cast <char *>(compile_spec.value .buffer );
@@ -53,11 +64,39 @@ Result<DelegateHandle*> NeuronBackend::init(
5364 " NeuronBackend" ,
5465 " IsImportForever Enable : %d" ,
5566 setting.mImportForever );
67+ } else if (std::strcmp (compile_spec.key , kSharedWeightsKey ) == 0 ) {
68+ setting.mSharedWeights = true ;
69+ std::string shared_weights_key (
70+ static_cast <char *>(compile_spec.value .buffer ),
71+ compile_spec.value .nbytes );
72+ LogInfo (
73+ " NeuronBackend" ,
74+ " SharedWeights Enabled for %s" ,
75+ shared_weights_key.c_str ());
76+
77+ const NamedDataMap* named_data_map = context.get_named_data_map ();
78+ Result<FreeableBuffer> shared_weights =
79+ named_data_map->get_data (shared_weights_key.c_str ());
80+
81+ if (shared_weights.ok ()) {
82+ LogInfo (
83+ " NeuronBackend" ,
84+ " Loaded shared weights from named_data_map. Size: %zu" ,
85+ shared_weights.get ().size ());
86+ FreeableBuffer& buffer = shared_weights.get ();
87+ delegate->SetSharedWeights (buffer);
88+ } else {
89+ LogError (
90+ " NeuronBackend" ,
91+ " Failed to load shared weights from named_data_map." );
92+ return Error::Internal;
93+ }
5694 } else {
5795 LogWarn (" NeuronBackend" , " unknown compile spec: %s" , compile_spec.key );
5896 }
5997 }
6098 auto Payload = NeuronPayload (processed->data (), processed->size ());
99+
61100 LogInfo (
62101 " NeuronBackend" ,
63102 " version %u, input %u, output %u, length %u, payload size: %zu" ,
@@ -67,19 +106,7 @@ Result<DelegateHandle*> NeuronBackend::init(
67106 Payload.Header .DataLen ,
68107 processed->size ());
69108
70- MemoryAllocator* runtime_allocator = context.get_runtime_allocator ();
71- NeuronExecuTorchDelegate* delegate =
72- runtime_allocator->allocateInstance <NeuronExecuTorchDelegate>();
73- if (delegate == nullptr ) {
74- return Error::MemoryAllocationFailed;
75- }
76-
77- new (delegate) NeuronExecuTorchDelegate ();
78-
79- if (delegate == nullptr ) {
80- return nullptr ;
81- }
82- auto res = delegate->LoadCompiledNetwork (Payload, setting);
109+ int res = delegate->LoadCompiledNetwork (Payload, setting);
83110 return res == NEURON_NO_ERROR ? delegate : nullptr ;
84111}
85112
@@ -111,21 +138,25 @@ Error NeuronExecuTorchDelegate::execute(
111138 return Error::InvalidState;
112139 };
113140
141+ ET_CHECK_OR_RETURN_ERROR (
142+ CheckDimOrder (args) == NEURON_NO_ERROR,
143+ Internal,
144+ " Expecting default dim_order but got a non default dim_order tensor input" );
145+
146+ PrepareInputsOuputs (args);
147+
114148 auto allocator = dynamic_cast <torch::executor::neuron::BufferAllocator*>(
115149 context.get_temp_allocator ());
116- size_t inputCount = mInputSizes .size (), outputCount = mOutputSizes .size ();
150+
151+ bool has_shared_weights_input = neuron_shared_weights_.size () > 0 ;
152+
153+ size_t inputCount =
154+ has_shared_weights_input ? mInputSizes .size () + 1 : mInputSizes .size ();
155+ size_t outputCount = mOutputSizes .size ();
117156
118157 for (int i = 0 ; i < inputCount; i++) {
119- auto tensor_in = args[i]->toTensor ();
120- ET_CHECK_OR_RETURN_ERROR (
121- runtime::is_contiguous_dim_order (
122- tensor_in.dim_order ().data (), tensor_in.dim ()),
123- Internal,
124- " Expecting default dim_order but got a non default dim_order tensor for external input %u" ,
125- i);
126-
127- auto data_ptr = args[i]->toTensor ().data_ptr ();
128- auto data_size = args[i]->toTensor ().nbytes ();
158+ auto data_ptr = mPreparedInputs [i].data_ptr ;
159+ auto data_size = mPreparedInputs [i].size ;
129160 if (IsCached</* isInput=*/ true >(i, data_ptr)) {
130161 continue ;
131162 };
@@ -140,22 +171,20 @@ Error NeuronExecuTorchDelegate::execute(
140171 }
141172 }
142173
143- for (int o = inputCount; o < inputCount + outputCount; o++) {
144- auto data_ptr = args[o]->toTensor ().data_ptr ();
145- auto data_size = args[o]->toTensor ().nbytes ();
146- auto output_index = o - inputCount;
147- if (IsCached</* isInput=*/ false >(output_index, data_ptr)) {
174+ for (int o = 0 ; o < outputCount; o++) {
175+ auto data_ptr = mPreparedOutputs [o].data_ptr ;
176+ auto data_size = mPreparedOutputs [o].size ;
177+ if (IsCached</* isInput=*/ false >(o, data_ptr)) {
148178 continue ;
149179 };
150180 auto unit = allocator != nullptr ? allocator->Find (data_ptr) : nullptr ;
151181 if (unit) {
152- UpdateCache</* isInput=*/ false >(output_index , data_ptr);
182+ UpdateCache</* isInput=*/ false >(o , data_ptr);
153183 size_t offset = (char *)data_ptr - (char *)unit->GetAddress ();
154184 mExecutor .SetInputOutputFromMemory </* isInput*/ false >(
155- output_index , unit->GetNeuronMemory (), offset, data_size);
185+ o , unit->GetNeuronMemory (), offset, data_size);
156186 } else {
157- mExecutor .SetInputOutput </* isInput=*/ false >(
158- output_index, data_ptr, data_size);
187+ mExecutor .SetInputOutput </* isInput=*/ false >(o, data_ptr, data_size);
159188 }
160189 }
161190
0 commit comments