diff --git a/include/onnxruntime/core/providers/nnapi/nnapi_provider_factory.h b/include/onnxruntime/core/providers/nnapi/nnapi_provider_factory.h index 1da88b1f07254..e8e1ecb1a0396 100644 --- a/include/onnxruntime/core/providers/nnapi/nnapi_provider_factory.h +++ b/include/onnxruntime/core/providers/nnapi/nnapi_provider_factory.h @@ -19,9 +19,25 @@ enum NNAPIFlags { // Please note for now, NNAPI perform worse using NCHW compare to using NHWC NNAPI_FLAG_USE_NCHW = 0x002, + // Prevent NNAPI from using CPU devices. + // + // NNAPI is more efficient using GPU or NPU for execution, and NNAPI might fall back to its own CPU implementation + // for operations not supported by GPU/NPU. The CPU implementation of NNAPI (which is called nnapi-reference) + // might be less efficient than the optimized versions of the operation of ORT. It might be advantageous to disable + // the NNAPI CPU fallback and handle execution using ORT kernels. + // + // For some models, if NNAPI would use CPU to execute an operation, and this flag is set, the execution of the + // model may fall back to ORT kernels. + // + // This option is only available after Android API level 29, and will be ignored for Android API level 28- + // + // For NNAPI device assignments, see https://developer.android.com/ndk/guides/neuralnetworks#device-assignment + // For NNAPI CPU fallback, see https://developer.android.com/ndk/guides/neuralnetworks#cpu-fallback + NNAPI_FLAG_CPU_DISABLED = 0x004, + // Keep NNAPI_FLAG_MAX at the end of the enum definition // And assign the last NNAPIFlag to it - NNAPI_FLAG_LAST = NNAPI_FLAG_USE_NCHW, + NNAPI_FLAG_LAST = NNAPI_FLAG_CPU_DISABLED, }; #ifdef __cplusplus diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc index ead5a7eb1ba21..3a7f9c1dc7859 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc @@ -87,17 +87,23 @@ Status ModelBuilder::GetTargetDevices() { for (uint32_t i = 0; i < num_devices; i++) { ANeuralNetworksDevice* device = nullptr; const char* device_name = nullptr; + int32_t device_type; RETURN_STATUS_ON_ERROR_WITH_NOTE( nnapi_->ANeuralNetworks_getDevice(i, &device), "Getting " + std::to_string(i) + "th device"); RETURN_STATUS_ON_ERROR_WITH_NOTE(nnapi_->ANeuralNetworksDevice_getName(device, &device_name), "Getting " + std::to_string(i) + "th device's name"); + RETURN_STATUS_ON_ERROR_WITH_NOTE(nnapi_->ANeuralNetworksDevice_getType(device, &device_type), + "Getting " + std::to_string(i) + "th device's type"); + bool device_is_cpu = nnapi_cpu == device_name; if ((target_device_option_ == TargetDeviceOption::CPU_DISABLED && !device_is_cpu) || (target_device_option_ == TargetDeviceOption::CPU_ONLY && device_is_cpu)) { nnapi_target_devices_.push_back(device); - LOGS_DEFAULT(VERBOSE) << "Target device [" << device_name << "] added"; + const auto device_detail = MakeString("[Name: [", device_name, "], Type [", device_type, "]], "); + nnapi_target_devices_detail_ += device_detail; + LOGS_DEFAULT(VERBOSE) << "Target device " << device_detail << " is added"; } } @@ -489,6 +495,7 @@ Status ModelBuilder::AddOperation(int op, const std::vector& input_ind output_indices.size(), &output_indices[0]), "op = " + std::to_string(op)); + num_nnapi_ops_++; return Status::OK(); } @@ -515,7 +522,38 @@ Status ModelBuilder::Compile(std::unique_ptr& model) { nnapi_->ANeuralNetworksModel_finish(nnapi_model_->model_), "on model finish"); + // We have a list of target devices, try to see if the model can be run entirely + // using the list of target devices + // This is only available on API 29+, for API 28- the nnapi_target_devices_ will + // be empty so we will not check API level here, see GetTargetDevices() + bool use_create_for_devices = false; if (!nnapi_target_devices_.empty()) { + std::unique_ptr supported_ops_holder = onnxruntime::make_unique(num_nnapi_ops_); + auto* supported_ops = supported_ops_holder.get(); + RETURN_STATUS_ON_ERROR_WITH_NOTE( + nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices( + nnapi_model_->model_, nnapi_target_devices_.data(), + nnapi_target_devices_.size(), supported_ops), + "on getSupportedOperationsForDevices"); + + bool all_ops_supported = std::all_of(supported_ops, supported_ops + num_nnapi_ops_, + [](bool is_supported) { return is_supported; }); + if (!all_ops_supported) { + // There are some ops not supported by the list of the target devices + // Fail the Compile + // + // TODO, add some logic to not fail for some cases + // Such as, if there are some acceptable fall back to cpu (nnapi-reference) + // and cpu is not in the target devices list + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "The model cannot run using current set of target devices, ", + nnapi_target_devices_detail_); + } else { + use_create_for_devices = true; + } + } + + if (use_create_for_devices) { RETURN_STATUS_ON_ERROR_WITH_NOTE( nnapi_->ANeuralNetworksCompilation_createForDevices( nnapi_model_->model_, nnapi_target_devices_.data(), diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h index 86882c8e7c975..ee2ac944b6327 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h @@ -22,9 +22,12 @@ class ModelBuilder { enum class TargetDeviceOption : int8_t { ALL_DEVICES, // use all avaliable target devices - /* TODO support this option - SINGLE_DEVICE, // use a single target device, must be given + + /* TODO support these options + PREFERRED_DEVICES, // Use one or more preferred devices (must be given) + EXCLUDED_DEVICES, // Exclude one or more devices (must be given) */ + CPU_DISABLED, // use all avaliable target devices except CPU CPU_ONLY, // use CPU only }; @@ -74,6 +77,8 @@ class ModelBuilder { // It is off by default void SetUseFp16(bool use_fp16) { use_fp16_ = use_fp16; } + void SetTargetDeviceOption(TargetDeviceOption option) { target_device_option_ = option; } + // Set NNAPI execution preference // Default preference is PREFER_SUSTAINED_SPEED void ExecutePreference( @@ -148,7 +153,10 @@ class ModelBuilder { TargetDeviceOption target_device_option_{TargetDeviceOption::ALL_DEVICES}; std::vector nnapi_target_devices_; + std::string nnapi_target_devices_detail_; // Debug info for target devices + // The number of nnapi operations in this model + size_t num_nnapi_ops_ = 0; uint32_t next_index_ = 0; // Convert the onnx model to ANeuralNetworksModel diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc index e959a24c2b95a..f1dfe7b03a47d 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc @@ -256,6 +256,10 @@ common::Status NnapiExecutionProvider::Compile(const std::vector nnapi_model; ORT_RETURN_IF_ERROR(builder.Compile(nnapi_model));