diff --git a/backends/qualcomm/README.md b/backends/qualcomm/README.md index 1ba01a659c4..18024e0ea93 100644 --- a/backends/qualcomm/README.md +++ b/backends/qualcomm/README.md @@ -56,7 +56,7 @@ backends/qualcomm | | # Meanwhile, this is also the runtime responsbile for executing compiled | | # models on a device. | └── backends # Backends supported by QNN. -| └── htpbackend +| └── gpu / htp | ├── aarch64 # Configuration required to run on device. (Device Part). | └── x86_64 # Configuration required to compile graph on host. (AoT Part). ├── scripts # Misc supporting scripts, not related to core functionality. diff --git a/backends/qualcomm/runtime/backends/CMakeLists.txt b/backends/qualcomm/runtime/backends/CMakeLists.txt index 6a44f3234c5..9c43a6b0c2a 100644 --- a/backends/qualcomm/runtime/backends/CMakeLists.txt +++ b/backends/qualcomm/runtime/backends/CMakeLists.txt @@ -43,58 +43,70 @@ target_sources( ${CMAKE_CURRENT_LIST_DIR}/QnnProfiler.cpp ) -# qnn_device -set(HOST_ARCHITECTURE - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/${CMAKE_SYSTEM_PROCESSOR} +set(HOST_ARCHITECTURE_GPU + ${CMAKE_CURRENT_LIST_DIR}/gpu/${CMAKE_SYSTEM_PROCESSOR} +) +set(HOST_ARCHITECTURE_HTP + ${CMAKE_CURRENT_LIST_DIR}/htp/${CMAKE_SYSTEM_PROCESSOR} ) +set(HOST_ARCHITECTURE_IR ${CMAKE_CURRENT_LIST_DIR}/ir/${CMAKE_SYSTEM_PROCESSOR}) +# qnn_device target_sources( qnn_device PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnDeviceCommon.h - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDevice.h + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuDevice.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevice.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnDeviceCommon.cpp - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDevice.cpp - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDevicePlatformInfoConfig.h - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDeviceCustomConfig.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevice.cpp + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevicePlatformInfoConfig.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpDeviceCustomConfig.h # When offline prepare context cache in x86 host we have to provide # platform infomation and SocModel to Qnn - ${HOST_ARCHITECTURE}/HtpDevicePlatformInfoConfig.cpp - ${HOST_ARCHITECTURE}/HtpDeviceCustomConfig.cpp + ${HOST_ARCHITECTURE_HTP}/HtpDevicePlatformInfoConfig.cpp + ${HOST_ARCHITECTURE_HTP}/HtpDeviceCustomConfig.cpp ) # qnn_context target_sources( qnn_context PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.h - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContext.h - ${CMAKE_CURRENT_LIST_DIR}/irbackend/IrContext.h - PRIVATE - ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.cpp - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContext.cpp - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContextCustomConfig.h - ${HOST_ARCHITECTURE}/HtpContextCustomConfig.cpp - ${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/IrContext.cpp + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpContext.h + ${CMAKE_CURRENT_LIST_DIR}/ir/IrContext.h + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContext.h + PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.cpp + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpContext.cpp + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpContextCustomConfig.h + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContext.cpp + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContextCustomConfig.h + ${HOST_ARCHITECTURE_GPU}/GpuContextCustomConfig.cpp + ${HOST_ARCHITECTURE_HTP}/HtpContextCustomConfig.cpp + ${HOST_ARCHITECTURE_IR}/IrContext.cpp ) # qnn_backend_cache target_sources( qnn_backend_cache PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCache.h - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackendCache.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackendCache.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCache.cpp - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackendCache.cpp + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackendCache.cpp ) # qnn_graph target_sources( qnn_graph PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnGraphCommon.h - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraph.h + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraph.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraph.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnGraphCommon.cpp - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraph.cpp - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraphCustomConfig.h - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraphCustomConfig.cpp - ${HOST_ARCHITECTURE}/HtpGraphCustomConfig.cpp + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraph.cpp + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraphCustomConfig.h + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraphCustomConfig.cpp + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraph.cpp + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraphCustomConfig.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraphCustomConfig.cpp + ${HOST_ARCHITECTURE_HTP}/HtpGraphCustomConfig.cpp ) # qnn_op_package_manager @@ -108,9 +120,13 @@ target_sources( target_sources( qnn_backend PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCommon.h - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackend.h - ${CMAKE_CURRENT_LIST_DIR}/irbackend/IrBackend.h + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackend.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackend.h + ${CMAKE_CURRENT_LIST_DIR}/ir/IrBackend.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCommon.cpp + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackend.cpp + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackendCustomConfig.h + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackendCustomConfig.cpp ) # qnn_mem_manager @@ -138,6 +154,5 @@ target_sources( target_sources( qnn_dlc_manager PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnDlcManager.h - PRIVATE - ${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/QnnDlcManager.cpp + PRIVATE ${HOST_ARCHITECTURE_IR}/QnnDlcManager.cpp ) diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp index e7e9db6fed8..10916d20532 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp @@ -53,8 +53,16 @@ std::unique_ptr QnnBackendFactory::Create( EnumNameQnnExecuTorchHtpPdSession(htp_options->pd_session())); QNN_EXECUTORCH_LOG_INFO( "use_conv_hmx in htp_options: %d", htp_options->use_conv_hmx()); + QNN_EXECUTORCH_LOG_INFO( + "use_dlbc in htp_options: %d", htp_options->use_dlbc()); QNN_EXECUTORCH_LOG_INFO( "use_fold_relu in htp_options: %d", htp_options->use_fold_relu()); + QNN_EXECUTORCH_LOG_INFO( + "use_multi_contexts in htp_options: %d", + htp_options->use_multi_contexts()); + QNN_EXECUTORCH_LOG_INFO( + "use_weight_sharing in htp_options: %d", + htp_options->use_weight_sharing()); } backend_params->qnn_backend_ptr_ = std::make_unique(implementation, logger); @@ -86,13 +94,66 @@ std::unique_ptr QnnBackendFactory::Create( get_option(options->log_level())); backend_params->backend_init_state_ = BackendInitializeState::INITIALIZED; } break; - case QnnExecuTorchBackendType::kGpuBackend: + case QnnExecuTorchBackendType::kGpuBackend: { + auto gpu_options = options->backend_options()->gpu_options(); + if (options->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo) { + QNN_EXECUTORCH_LOG_INFO( + "performance_mode in gpu_options: %s", + EnumNameQnnExecuTorchGpuPerformanceMode( + gpu_options->performance_mode())); + QNN_EXECUTORCH_LOG_INFO( + "precision in gpu_options: %s", + EnumNameQnnExecuTorchGpuPrecision(gpu_options->precision())); + QNN_EXECUTORCH_LOG_INFO( + "use_memory_optimizations in gpu_options: %d", + gpu_options->use_memory_optimizations()); + QNN_EXECUTORCH_LOG_INFO( + "use_node_optimizations in gpu_options: %d", + gpu_options->use_node_optimizations()); + QNN_EXECUTORCH_LOG_INFO( + "use_queue_recording in gpu_options: %d", + gpu_options->use_queue_recording()); + QNN_EXECUTORCH_LOG_INFO( + "use_weight_sharing in gpu_options: %d", + gpu_options->use_weight_sharing()); + } + backend_params->qnn_backend_ptr_ = + std::make_unique(implementation, logger, gpu_options); + + backend_params->qnn_device_ptr_ = + std::make_unique(implementation, logger); + + backend_params->qnn_backend_cache_ptr_ = + std::make_unique(qnn_context_blob); + + backend_params->qnn_context_ptr_ = std::make_unique( + implementation, + backend_params->qnn_backend_ptr_.get(), + backend_params->qnn_device_ptr_.get(), + backend_params->qnn_backend_cache_ptr_.get(), + qnn_dlc_manager, + gpu_options); + + backend_params->qnn_graph_ptr_ = std::make_unique( + implementation, + backend_params->qnn_backend_ptr_.get(), + backend_params->qnn_context_ptr_.get(), + options->profile_level(), + gpu_options); + } break; case QnnExecuTorchBackendType::kDspBackend: case QnnExecuTorchBackendType::kUndefinedBackend: default: return nullptr; } + backend_params->qnn_mem_manager_ptr_ = std::make_unique( + implementation, + backend_params->qnn_context_ptr_.get(), + options->log_level()); + + backend_params->backend_init_state_ = BackendInitializeState::INITIALIZED; + if (backend_params->qnn_backend_ptr_->VerifyQNNSDKVersion() == Error::Ok) { return backend_params; } diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.h b/backends/qualcomm/runtime/backends/QnnBackendFactory.h index 3d78a36b9f0..c6c112ccf2c 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendFactory.h +++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.h @@ -17,11 +17,15 @@ #include #include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include namespace executorch { diff --git a/backends/qualcomm/runtime/backends/QnnDeviceCommon.h b/backends/qualcomm/runtime/backends/QnnDeviceCommon.h index 85de00f8623..f0f1b5b0fbd 100644 --- a/backends/qualcomm/runtime/backends/QnnDeviceCommon.h +++ b/backends/qualcomm/runtime/backends/QnnDeviceCommon.h @@ -29,7 +29,7 @@ class QnnDevice { return handle_; } - executorch::runtime::Error Configure(); + virtual executorch::runtime::Error Configure(); protected: virtual executorch::runtime::Error MakeConfig( diff --git a/backends/qualcomm/runtime/backends/QnnDlcManager.h b/backends/qualcomm/runtime/backends/QnnDlcManager.h index a57906df4e3..940c73e518a 100644 --- a/backends/qualcomm/runtime/backends/QnnDlcManager.h +++ b/backends/qualcomm/runtime/backends/QnnDlcManager.h @@ -10,7 +10,7 @@ #include #include -#include +#include #include "QnnWrapperUtils.hpp" namespace executorch { diff --git a/backends/qualcomm/runtime/backends/gpu/GpuBackend.cpp b/backends/qualcomm/runtime/backends/gpu/GpuBackend.cpp new file mode 100644 index 00000000000..2332193d30d --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuBackend.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include "GPU/QnnGpuCommon.h" + +namespace executorch { +namespace backends { +namespace qnn { + +using executorch::runtime::Error; + +GpuBackend::GpuBackend( + const QnnImplementation& implementation, + QnnLogger* logger, + const QnnExecuTorchGpuBackendOptions* gpu_options) + : QnnBackend(implementation, logger) { + gpu_backend_custom_config_ = + std::make_unique(gpu_options); +} + +Qnn_Version_t GpuBackend::GetExpectedBackendVersion() const { + Qnn_Version_t backend_version; + backend_version.major = QNN_GPU_API_VERSION_MAJOR; + backend_version.minor = QNN_GPU_API_VERSION_MINOR; + backend_version.patch = QNN_GPU_API_VERSION_PATCH; + return backend_version; +} + +bool GpuBackend::IsProfileEventTypeParentOfNodeTime( + QnnProfile_EventType_t event_type) { + return (event_type == QNN_PROFILE_EVENTTYPE_EXECUTE); +} + +Error GpuBackend::MakeConfig(std::vector& config) { + const std::vector& backend_custom_config = + gpu_backend_custom_config_->CreateBackendCustomConfig(); + + uint32_t num_custom_configs = backend_custom_config.size(); + backend_config_.resize(num_custom_configs); + // +1 for null terminated + config.reserve(num_custom_configs + 1); + + for (std::size_t i = 0; i < num_custom_configs; ++i) { + backend_config_[i].option = QNN_BACKEND_CONFIG_OPTION_CUSTOM; + backend_config_[i].customConfig = backend_custom_config[i]; + config.push_back(&backend_config_[i]); + } + + config.push_back(nullptr); + return Error::Ok; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuBackend.h b/backends/qualcomm/runtime/backends/gpu/GpuBackend.h new file mode 100644 index 00000000000..f0a2de2fc8c --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuBackend.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +class GpuBackend : public QnnBackend { + public: + GpuBackend( + const QnnImplementation& implementation, + QnnLogger* logger, + const QnnExecuTorchGpuBackendOptions* gpu_options); + + Qnn_Version_t GetExpectedBackendVersion() const override; + + bool IsProfileEventTypeParentOfNodeTime( + QnnProfile_EventType_t event_type) override; + + protected: + executorch::runtime::Error MakeConfig( + std::vector& config) override; + + private: + std::vector backend_config_; + std::unique_ptr gpu_backend_custom_config_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.cpp b/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.cpp new file mode 100644 index 00000000000..60e289493d0 --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +GpuBackendCustomConfig::GpuBackendCustomConfig( + const QnnExecuTorchGpuBackendOptions* gpu_options) + : gpu_options_(gpu_options) {} + +QnnGpuBackend_CustomConfig_t* +GpuBackendCustomConfig::AllocBackendCustomConfig() { + gpu_backend_config_.emplace_back( + std::make_unique()); + gpu_backend_config_.back()->option = QNN_GPU_BACKEND_CONFIG_OPTION_UNDEFINED; + return gpu_backend_config_.back().get(); +} + +std::vector +GpuBackendCustomConfig::CreateBackendCustomConfig() { + std::vector ret; + QnnGpuBackend_CustomConfig_t* p_custom_config = nullptr; + + if (gpu_options_->use_weight_sharing()) { + p_custom_config = AllocBackendCustomConfig(); + p_custom_config->option = + QNN_GPU_BACKEND_CONFIG_OPTION_WEIGHT_SHARING_ENABLED; + p_custom_config->weightSharingEnabled = 1; + ret.push_back(static_cast(p_custom_config)); + } + return ret; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.h b/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.h new file mode 100644 index 00000000000..150235a82e6 --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include +#include + +#include "GPU/QnnGpuBackend.h" + +namespace executorch { +namespace backends { +namespace qnn { + +using namespace qnn_delegate; + +class GpuBackendCustomConfig { + public: + explicit GpuBackendCustomConfig( + const QnnExecuTorchGpuBackendOptions* gpu_options); + + std::vector CreateBackendCustomConfig(); + + private: + QnnGpuBackend_CustomConfig_t* AllocBackendCustomConfig(); + std::vector> + gpu_backend_config_; + const QnnExecuTorchGpuBackendOptions* gpu_options_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuContext.cpp b/backends/qualcomm/runtime/backends/gpu/GpuContext.cpp new file mode 100644 index 00000000000..d3816fc560e --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuContext.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +using executorch::runtime::Error; + +GpuContext::GpuContext( + const QnnImplementation& implementation, + QnnBackend* backend, + QnnDevice* device, + QnnBackendCache* cache, + QnnDlcManager* qnn_dlc_manager, + const QnnExecuTorchGpuBackendOptions* gpu_options) + : QnnContext(implementation, backend, device, cache, qnn_dlc_manager) { + gpu_context_custom_config_ = + std::make_unique(gpu_options); +} + +Error GpuContext::MakeConfig(std::vector& config) { + const std::vector& context_custom_config = + gpu_context_custom_config_->CreateContextCustomConfig(); + + uint32_t num_custom_configs = context_custom_config.size(); + context_config_.resize(num_custom_configs); + // +1 for null terminated + config.reserve(num_custom_configs + 1); + + for (std::size_t i = 0; i < num_custom_configs; ++i) { + context_config_[i].option = QNN_CONTEXT_CONFIG_OPTION_CUSTOM; + context_config_[i].customConfig = context_custom_config[i]; + config.push_back(&context_config_[i]); + } + + config.push_back(nullptr); + return Error::Ok; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuContext.h b/backends/qualcomm/runtime/backends/gpu/GpuContext.h new file mode 100644 index 00000000000..873117c0e50 --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuContext.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +class QnnDlcManager; +class GpuContext : public QnnContext { + public: + GpuContext( + const QnnImplementation& implementation, + QnnBackend* backend, + QnnDevice* device, + QnnBackendCache* cache, + QnnDlcManager* qnn_dlc_manager, + const QnnExecuTorchGpuBackendOptions* gpu_options); + + protected: + executorch::runtime::Error MakeConfig( + std::vector& config) override; + + private: + std::vector context_config_; + std::unique_ptr gpu_context_custom_config_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuContextCustomConfig.h b/backends/qualcomm/runtime/backends/gpu/GpuContextCustomConfig.h new file mode 100644 index 00000000000..8a1f635bee0 --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuContextCustomConfig.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include +#include + +#include "GPU/QnnGpuContext.h" + +namespace executorch { +namespace backends { +namespace qnn { + +using namespace qnn_delegate; + +class GpuContextCustomConfig { + public: + explicit GpuContextCustomConfig( + const QnnExecuTorchGpuBackendOptions* gpu_options) + : gpu_options_(gpu_options) {} + + std::vector CreateContextCustomConfig(); + + private: + QnnGpuContext_CustomConfig_t* AllocContextCustomConfig() { + gpu_context_config_.emplace_back( + std::make_unique()); + gpu_context_config_.back()->option = + QNN_GPU_CONTEXT_CONFIG_OPTION_UNDEFINED; + return gpu_context_config_.back().get(); + } + std::vector> + gpu_context_config_; + [[maybe_unused]] const QnnExecuTorchGpuBackendOptions* gpu_options_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuDevice.h b/backends/qualcomm/runtime/backends/gpu/GpuDevice.h new file mode 100644 index 00000000000..20d6568ecc3 --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuDevice.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +#pragma once + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +class GpuDevice : public QnnDevice { + public: + GpuDevice(const QnnImplementation& implementation, QnnLogger* logger) + : QnnDevice(implementation, logger){}; + + // GPU backend does not support device creation + executorch::runtime::Error Configure() override { + return executorch::runtime::Error::Ok; + } +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuGraph.cpp b/backends/qualcomm/runtime/backends/gpu/GpuGraph.cpp new file mode 100644 index 00000000000..d626ac47c7d --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuGraph.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +using executorch::runtime::Error; + +GpuGraph::GpuGraph( + const QnnImplementation& implementation, + QnnBackend* backend, + QnnContext* context, + const QnnExecuTorchProfileLevel& profile_level, + const QnnExecuTorchGpuBackendOptions* gpu_options) + : QnnGraph(implementation, backend, context, profile_level) { + gpu_graph_custom_config_ = + std::make_unique(gpu_options); +} + +Error GpuGraph::MakeConfig(std::vector& config) { + const std::vector& graph_custom_config = + gpu_graph_custom_config_->CreateGraphCustomConfig(); + + uint32_t num_custom_configs = graph_custom_config.size(); + graph_config_.resize(num_custom_configs); + // +1 for null terminated + config.reserve(num_custom_configs + 1); + + for (std::size_t i = 0; i < num_custom_configs; ++i) { + graph_config_[i].option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_config_[i].customConfig = graph_custom_config[i]; + config.push_back(&graph_config_[i]); + } + + config.push_back(nullptr); + return Error::Ok; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuGraph.h b/backends/qualcomm/runtime/backends/gpu/GpuGraph.h new file mode 100644 index 00000000000..c2b5bf2832d --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuGraph.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +class GpuGraph : public QnnGraph { + public: + GpuGraph( + const QnnImplementation& implementation, + QnnBackend* backend, + QnnContext* context, + const QnnExecuTorchProfileLevel& profile_level, + const QnnExecuTorchGpuBackendOptions* gpu_options); + + protected: + executorch::runtime::Error MakeConfig( + std::vector& config) override; + + private: + std::vector graph_config_; + std::unique_ptr gpu_graph_custom_config_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.cpp b/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.cpp new file mode 100644 index 00000000000..17f094db805 --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +GpuGraphCustomConfig::GpuGraphCustomConfig( + const QnnExecuTorchGpuBackendOptions* gpu_options) + : gpu_options_(gpu_options) {} + +QnnGpuGraph_CustomConfig_t* GpuGraphCustomConfig::AllocGraphCustomConfig() { + gpu_graph_config_.emplace_back( + std::make_unique()); + return gpu_graph_config_.back().get(); +} + +std::vector +GpuGraphCustomConfig::CreateGraphCustomConfig() { + std::vector ret; + QnnGpuGraph_CustomConfig_t* p_custom_config = nullptr; + + p_custom_config = AllocGraphCustomConfig(); + p_custom_config->precision = + static_cast(gpu_options_->precision()); + p_custom_config->disableMemoryOptimizations = + !gpu_options_->use_memory_optimizations(); + p_custom_config->disableNodeOptimizations = + !gpu_options_->use_node_optimizations(); + p_custom_config->disableQueueRecording = !gpu_options_->use_queue_recording(); + ret.push_back(static_cast(p_custom_config)); + return ret; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.h b/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.h new file mode 100644 index 00000000000..a47cd1a3345 --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include +#include + +#include "GPU/QnnGpuGraph.h" + +namespace executorch { +namespace backends { +namespace qnn { + +using namespace qnn_delegate; + +class GpuGraphCustomConfig { + public: + explicit GpuGraphCustomConfig( + const QnnExecuTorchGpuBackendOptions* gpu_options); + + std::vector CreateGraphCustomConfig(); + + private: + QnnGpuGraph_CustomConfig_t* AllocGraphCustomConfig(); + std::vector> gpu_graph_config_; + const QnnExecuTorchGpuBackendOptions* gpu_options_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/aarch64/GpuContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/gpu/aarch64/GpuContextCustomConfig.cpp new file mode 100644 index 00000000000..b4f200897ba --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/aarch64/GpuContextCustomConfig.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +std::vector +GpuContextCustomConfig::CreateContextCustomConfig() { + std::vector ret; + QnnGpuContext_CustomConfig_t* p_custom_config = nullptr; + + p_custom_config = AllocContextCustomConfig(); + p_custom_config->option = QNN_GPU_CONTEXT_CONFIG_OPTION_PERF_HINT; + p_custom_config->perfHint = + static_cast(gpu_options_->performance_mode()); + ret.push_back(static_cast(p_custom_config)); + return ret; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/x86_64/GpuContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/gpu/x86_64/GpuContextCustomConfig.cpp new file mode 100644 index 00000000000..69784c1797f --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/x86_64/GpuContextCustomConfig.cpp @@ -0,0 +1,22 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +std::vector +GpuContextCustomConfig::CreateContextCustomConfig() { + return {}; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h b/backends/qualcomm/runtime/backends/htp/HtpBackend.h similarity index 100% rename from backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h rename to backends/qualcomm/runtime/backends/htp/HtpBackend.h diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.cpp b/backends/qualcomm/runtime/backends/htp/HtpBackendCache.cpp similarity index 96% rename from backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.cpp rename to backends/qualcomm/runtime/backends/htp/HtpBackendCache.cpp index 030b5666daf..3038a100d03 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.cpp +++ b/backends/qualcomm/runtime/backends/htp/HtpBackendCache.cpp @@ -5,7 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ -#include +#include #include "HTP/QnnHtpSystemContext.h" namespace executorch { diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.h b/backends/qualcomm/runtime/backends/htp/HtpBackendCache.h similarity index 100% rename from backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.h rename to backends/qualcomm/runtime/backends/htp/HtpBackendCache.h diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp b/backends/qualcomm/runtime/backends/htp/HtpContext.cpp similarity index 94% rename from backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp rename to backends/qualcomm/runtime/backends/htp/HtpContext.cpp index 50d299b55e9..0056a2c0917 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp +++ b/backends/qualcomm/runtime/backends/htp/HtpContext.cpp @@ -7,7 +7,7 @@ */ #include -#include +#include #include "HTP/QnnHtpCommon.h" diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h b/backends/qualcomm/runtime/backends/htp/HtpContext.h similarity index 94% rename from backends/qualcomm/runtime/backends/htpbackend/HtpContext.h rename to backends/qualcomm/runtime/backends/htp/HtpContext.h index 88660db080a..ff937593434 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h +++ b/backends/qualcomm/runtime/backends/htp/HtpContext.h @@ -10,7 +10,7 @@ #include #include -#include +#include namespace executorch { namespace backends { diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContextCustomConfig.h b/backends/qualcomm/runtime/backends/htp/HtpContextCustomConfig.h similarity index 100% rename from backends/qualcomm/runtime/backends/htpbackend/HtpContextCustomConfig.h rename to backends/qualcomm/runtime/backends/htp/HtpContextCustomConfig.h diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp b/backends/qualcomm/runtime/backends/htp/HtpDevice.cpp similarity index 99% rename from backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp rename to backends/qualcomm/runtime/backends/htp/HtpDevice.cpp index 35a20048fc5..75809383ccd 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp +++ b/backends/qualcomm/runtime/backends/htp/HtpDevice.cpp @@ -7,7 +7,7 @@ */ #include -#include +#include #include "HTP/QnnHtpCommon.h" #include "Saver/QnnSaverCommon.h" diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h b/backends/qualcomm/runtime/backends/htp/HtpDevice.h similarity index 92% rename from backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h rename to backends/qualcomm/runtime/backends/htp/HtpDevice.h index 9052deb6b52..82eb8aab9ab 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h +++ b/backends/qualcomm/runtime/backends/htp/HtpDevice.h @@ -9,8 +9,8 @@ #include #include -#include -#include +#include +#include #include #include "HTP/QnnHtpDevice.h" @@ -38,7 +38,7 @@ class HtpDevice : public QnnDevice { } ~HtpDevice(); - // Defines Qnn performance mode vote types for htpbackend + // Defines Qnn performance mode vote types for htp enum PerformanceModeVoteType { kNoVote = 0, kUpVote = 1, diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDeviceCustomConfig.h b/backends/qualcomm/runtime/backends/htp/HtpDeviceCustomConfig.h similarity index 100% rename from backends/qualcomm/runtime/backends/htpbackend/HtpDeviceCustomConfig.h rename to backends/qualcomm/runtime/backends/htp/HtpDeviceCustomConfig.h diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDevicePlatformInfoConfig.h b/backends/qualcomm/runtime/backends/htp/HtpDevicePlatformInfoConfig.h similarity index 100% rename from backends/qualcomm/runtime/backends/htpbackend/HtpDevicePlatformInfoConfig.h rename to backends/qualcomm/runtime/backends/htp/HtpDevicePlatformInfoConfig.h diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.cpp b/backends/qualcomm/runtime/backends/htp/HtpGraph.cpp similarity index 93% rename from backends/qualcomm/runtime/backends/htpbackend/HtpGraph.cpp rename to backends/qualcomm/runtime/backends/htp/HtpGraph.cpp index 29dcf0a58c3..6208febe61a 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.cpp +++ b/backends/qualcomm/runtime/backends/htp/HtpGraph.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#include +#include namespace executorch { namespace backends { namespace qnn { diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h b/backends/qualcomm/runtime/backends/htp/HtpGraph.h similarity index 93% rename from backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h rename to backends/qualcomm/runtime/backends/htp/HtpGraph.h index c3add50d08b..db24a64cdfd 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h +++ b/backends/qualcomm/runtime/backends/htp/HtpGraph.h @@ -8,7 +8,7 @@ #pragma once #include -#include +#include #include diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.cpp similarity index 97% rename from backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.cpp rename to backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.cpp index d43f8320285..17b8438880d 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.cpp @@ -7,7 +7,7 @@ */ #include #include -#include +#include namespace executorch { namespace backends { namespace qnn { diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.h b/backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.h similarity index 100% rename from backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.h rename to backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.h diff --git a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/aarch64/HtpContextCustomConfig.cpp similarity index 87% rename from backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpContextCustomConfig.cpp rename to backends/qualcomm/runtime/backends/htp/aarch64/HtpContextCustomConfig.cpp index 04a5d844dd0..676795797f8 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpContextCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/aarch64/HtpContextCustomConfig.cpp @@ -6,8 +6,8 @@ * LICENSE file in the root directory of this source tree. */ -#include -#include +#include +#include namespace executorch { namespace backends { diff --git a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDeviceCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/aarch64/HtpDeviceCustomConfig.cpp similarity index 84% rename from backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDeviceCustomConfig.cpp rename to backends/qualcomm/runtime/backends/htp/aarch64/HtpDeviceCustomConfig.cpp index 81ac4a14372..8207f5071ba 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDeviceCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/aarch64/HtpDeviceCustomConfig.cpp @@ -5,7 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ -#include +#include namespace executorch { namespace backends { namespace qnn { diff --git a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDevicePlatformInfoConfig.cpp b/backends/qualcomm/runtime/backends/htp/aarch64/HtpDevicePlatformInfoConfig.cpp similarity index 83% rename from backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDevicePlatformInfoConfig.cpp rename to backends/qualcomm/runtime/backends/htp/aarch64/HtpDevicePlatformInfoConfig.cpp index c191791fa63..91221a78fd6 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDevicePlatformInfoConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/aarch64/HtpDevicePlatformInfoConfig.cpp @@ -5,7 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ -#include +#include namespace executorch { namespace backends { namespace qnn { diff --git a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpGraphCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/aarch64/HtpGraphCustomConfig.cpp similarity index 85% rename from backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpGraphCustomConfig.cpp rename to backends/qualcomm/runtime/backends/htp/aarch64/HtpGraphCustomConfig.cpp index 096fda7b059..faac23edc12 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpGraphCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/aarch64/HtpGraphCustomConfig.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#include +#include namespace executorch { namespace backends { diff --git a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/x86_64/HtpContextCustomConfig.cpp similarity index 90% rename from backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpContextCustomConfig.cpp rename to backends/qualcomm/runtime/backends/htp/x86_64/HtpContextCustomConfig.cpp index 1fc2940eaa7..4850afa14a2 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpContextCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/x86_64/HtpContextCustomConfig.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#include +#include namespace executorch { namespace backends { diff --git a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDeviceCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/x86_64/HtpDeviceCustomConfig.cpp similarity index 90% rename from backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDeviceCustomConfig.cpp rename to backends/qualcomm/runtime/backends/htp/x86_64/HtpDeviceCustomConfig.cpp index 154433c10b0..9afbf489bc1 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDeviceCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/x86_64/HtpDeviceCustomConfig.cpp @@ -5,7 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ -#include +#include namespace executorch { namespace backends { namespace qnn { diff --git a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDevicePlatformInfoConfig.cpp b/backends/qualcomm/runtime/backends/htp/x86_64/HtpDevicePlatformInfoConfig.cpp similarity index 96% rename from backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDevicePlatformInfoConfig.cpp rename to backends/qualcomm/runtime/backends/htp/x86_64/HtpDevicePlatformInfoConfig.cpp index b025f0b2aa6..15c677e8a68 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDevicePlatformInfoConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/x86_64/HtpDevicePlatformInfoConfig.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ #include -#include +#include namespace executorch { namespace backends { namespace qnn { diff --git a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpGraphCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/x86_64/HtpGraphCustomConfig.cpp similarity index 85% rename from backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpGraphCustomConfig.cpp rename to backends/qualcomm/runtime/backends/htp/x86_64/HtpGraphCustomConfig.cpp index 330ca43e20b..ec01f2bbfdd 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpGraphCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/x86_64/HtpGraphCustomConfig.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#include +#include namespace executorch { namespace backends { diff --git a/backends/qualcomm/runtime/backends/irbackend/IrBackend.h b/backends/qualcomm/runtime/backends/ir/IrBackend.h similarity index 100% rename from backends/qualcomm/runtime/backends/irbackend/IrBackend.h rename to backends/qualcomm/runtime/backends/ir/IrBackend.h diff --git a/backends/qualcomm/runtime/backends/irbackend/IrContext.h b/backends/qualcomm/runtime/backends/ir/IrContext.h similarity index 100% rename from backends/qualcomm/runtime/backends/irbackend/IrContext.h rename to backends/qualcomm/runtime/backends/ir/IrContext.h diff --git a/backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp b/backends/qualcomm/runtime/backends/ir/aarch64/IrContext.cpp similarity index 88% rename from backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp rename to backends/qualcomm/runtime/backends/ir/aarch64/IrContext.cpp index 44ce8de8f46..12a27b19ccd 100644 --- a/backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp +++ b/backends/qualcomm/runtime/backends/ir/aarch64/IrContext.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include namespace executorch { namespace backends { diff --git a/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/ir/aarch64/QnnDlcManager.cpp similarity index 100% rename from backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp rename to backends/qualcomm/runtime/backends/ir/aarch64/QnnDlcManager.cpp diff --git a/backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp b/backends/qualcomm/runtime/backends/ir/x86_64/IrContext.cpp similarity index 94% rename from backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp rename to backends/qualcomm/runtime/backends/ir/x86_64/IrContext.cpp index f167aae9319..cf5df3de8e9 100644 --- a/backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp +++ b/backends/qualcomm/runtime/backends/ir/x86_64/IrContext.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include namespace executorch { namespace backends { namespace qnn { diff --git a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/ir/x86_64/QnnDlcManager.cpp similarity index 98% rename from backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp rename to backends/qualcomm/runtime/backends/ir/x86_64/QnnDlcManager.cpp index 280751cf160..7190dba0236 100644 --- a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp +++ b/backends/qualcomm/runtime/backends/ir/x86_64/QnnDlcManager.cpp @@ -7,7 +7,7 @@ */ #include #include -#include +#include namespace executorch { namespace backends { diff --git a/backends/qualcomm/runtime/targets.bzl b/backends/qualcomm/runtime/targets.bzl index db3706ba221..85cece2bae7 100644 --- a/backends/qualcomm/runtime/targets.bzl +++ b/backends/qualcomm/runtime/targets.bzl @@ -44,10 +44,12 @@ def define_common_targets(): [ "*.cpp", "backends/*.cpp", - "backends/irbackend/*.cpp", - "backends/htpbackend/*.cpp", - ] + (["backends/htpbackend/x86_64/*.cpp"] if include_aot_qnn_lib else ["backends/htpbackend/aarch64/*.cpp"]) + ( - ["backends/irbackend/x86_64/*.cpp"] if include_aot_qnn_lib else ["backends/irbackend/aarch64/*.cpp"] + "backends/gpu/*.cpp", + "backends/htp/*.cpp", + "backends/ir/*.cpp", + ] + (["backends/gpu/x86_64/*.cpp"] if include_aot_qnn_lib else ["backends/gpu/aarch64/*.cpp"]) + ( + ["backends/htp/x86_64/*.cpp"] if include_aot_qnn_lib else ["backends/htp/aarch64/*.cpp"]) + ( + ["backends/ir/x86_64/*.cpp"] if include_aot_qnn_lib else ["backends/ir/aarch64/*.cpp"] ), exclude = ["Logging.cpp"], ), @@ -55,8 +57,9 @@ def define_common_targets(): [ "*.h", "backends/*.h", - "backends/irbackend/*.h", - "backends/htpbackend/*.h", + "backends/gpu/*.h", + "backends/htp/*.h", + "backends/ir/*.h", ], exclude = ["Logging.h"], ), diff --git a/backends/qualcomm/serialization/qc_compiler_spec.fbs b/backends/qualcomm/serialization/qc_compiler_spec.fbs index 9a24b95d261..548b2e5e80e 100644 --- a/backends/qualcomm/serialization/qc_compiler_spec.fbs +++ b/backends/qualcomm/serialization/qc_compiler_spec.fbs @@ -61,6 +61,50 @@ table SocInfo { htp_info:HtpInfo; } +/// Defines performance modes available for GPU backend. +enum QnnExecuTorchGpuPerformanceMode: int { + kGpuPerfHintHigh = 0, + kGpuPerfHintNormal, + kGpuPerfHintLow, +} + +/// Defines the optimization levels of the graph tensors that are not input nor +/// output tensors. This enum controls the trade-off between performance and +/// accuracy. +enum QnnExecuTorchGpuPrecision: int { + kGpuPrecisionFp32 = 0, + kGpuPrecisionFp16, + kGpuPrecisionHybrid, + kGpuPrecisionUserProvided, +} + +/// Specifies the backend options for the GPU backend. +table QnnExecuTorchGpuBackendOptions { + /// kGpuPerfHintHigh - best inference latency at the expense of power consumption. + /// kGpuPerfHintNormal - balanced performance dependent upon power management. + /// kGpuPerfHintLow - lowest power consumption at the expense of inference latency. + performance_mode:QnnExecuTorchGpuPerformanceMode; + + /// kGpuPrecisionFp32 - best accuracy at the expense of performance. + /// kGpuPrecisionFp16 - best performance at the expense of accuracy. + /// kGpuPrecisionHybrid - good trade-off between performance and accuracy. + /// kGpuPrecisionUserProvided - backend will not optimize NATIVE tensor data types. + precision:QnnExecuTorchGpuPrecision; + + /// Backend will share NATIVE tensor memory based upon analysis of the network topology. + use_memory_optimizations:bool; + + /// Backend will fuse compatible operations into one operation to improve performance. + use_node_optimizations:bool; + + /// Backend will use queue recording to improve performance. + use_queue_recording:bool; + + /// When multiple graphs appear inside the same context, + /// weights could be reused across all graphs. + use_weight_sharing:bool; +} + /// Defines performance modes available for HTP backend. enum QnnExecuTorchHtpPerformanceMode: int { kHtpDefault = 0, @@ -172,7 +216,6 @@ enum QnnExecuTorchOpPackagePlatform: int { AARCH64_ANDROID, } - table QnnExecuTorchOpPackageInfo { /// The name of the op package. op_package_name:string; @@ -197,7 +240,6 @@ table QnnExecuTorchOpPackageInfo { platform:QnnExecuTorchOpPackagePlatform; } - table QnnExecuTorchOpPackageOptions { /// An array of QnnExecuTorchOpPackageInfo structures. op_package_infos:[QnnExecuTorchOpPackageInfo]; @@ -210,6 +252,8 @@ table QnnExecuTorchBackendOptions { backend_type:QnnExecuTorchBackendType; htp_options:QnnExecuTorchHtpBackendOptions; + + gpu_options:QnnExecuTorchGpuBackendOptions; } table QnnExecuTorchOptions { diff --git a/backends/qualcomm/serialization/qc_schema.py b/backends/qualcomm/serialization/qc_schema.py index 4fe28e4e88a..80d62695211 100644 --- a/backends/qualcomm/serialization/qc_schema.py +++ b/backends/qualcomm/serialization/qc_schema.py @@ -10,7 +10,7 @@ from dataclasses import dataclass, field from enum import IntEnum, unique -from typing import List +from typing import List, Optional @dataclass @@ -85,6 +85,35 @@ class SocInfo: } +@unique +class QnnExecuTorchGpuPerformanceMode(IntEnum): + kGpuPerfHintHigh = 0 + kGpuPerfHintNormal = 1 + kGpuPerfHintLow = 2 + + +@unique +class QnnExecuTorchGpuPrecision(IntEnum): + kGpuPrecisionFp32 = 0 + kGpuPrecisionFp16 = 1 + kGpuPrecisionHybrid = 2 + kGpuPrecisionUserProvided = 3 + + +@dataclass +class QnnExecuTorchGpuBackendOptions: + performance_mode: QnnExecuTorchGpuPerformanceMode = ( + QnnExecuTorchGpuPerformanceMode.kGpuPerfHintHigh + ) + precision: QnnExecuTorchGpuPrecision = ( + QnnExecuTorchGpuPrecision.kGpuPrecisionUserProvided + ) + use_memory_optimizations: bool = True + use_node_optimizations: bool = True + use_queue_recording: bool = True + use_weight_sharing: bool = False + + @unique class QnnExecuTorchHtpPerformanceMode(IntEnum): kHtpDefault = 0 @@ -155,7 +184,8 @@ class QnnExecuTorchProfileLevel(IntEnum): @dataclass class QnnExecuTorchBackendOptions: backend_type: QnnExecuTorchBackendType - htp_options: QnnExecuTorchHtpBackendOptions + htp_options: Optional[QnnExecuTorchHtpBackendOptions] = None + gpu_options: Optional[QnnExecuTorchGpuBackendOptions] = None @unique diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index 8822db5f7c3..1648857049e 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -30,6 +30,7 @@ generate_context_binary, ModuleQConfig, prepare_pt2e, + QnnExecuTorchBackendType, QuantDtype, TestQNN, validate_context_binary, @@ -47,6 +48,7 @@ capture_program, dump_context_from_pte, from_context_binary, + generate_gpu_compiler_spec, generate_htp_compiler_spec, generate_qnn_executorch_compiler_spec, is_qnn_sdk_version_less_than, @@ -91,9 +93,16 @@ class TestQNNFloatingPointOperator(TestQNN): # TODO: refactor to support different backends def setUp(self): + match self.get_backend_type(): + case QnnExecuTorchBackendType.kHtpBackend: + backend_options = generate_htp_compiler_spec(use_fp16=True) + case QnnExecuTorchBackendType.kGpuBackend: + backend_options = generate_gpu_compiler_spec() + case _: + raise ValueError("Backend is not implemented yet") + TestQNN.atol = 1e-1 TestQNN.rtol = 1e-1 - backend_options = generate_htp_compiler_spec(use_fp16=True) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, @@ -8646,6 +8655,13 @@ def setup_environment(): default="", type=str, ) + parser.add_argument( + "--backend", + help="Backend to be deployed ('htp'/'gpu' are currently supported).", + choices=["htp", "gpu"], + default="htp", + type=str, + ) parser.add_argument( "--llama_artifacts", help="A folder that contains: weight, tokenizer, and params.", @@ -8676,6 +8692,7 @@ def setup_environment(): TestQNN.llama_artifacts = args.llama_artifacts TestQNN.op_package_dir = args.op_package_dir TestQNN.target = args.target + TestQNN.backend = args.backend return sys.argv[:1] + ns_args diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index e846379962d..f7b4f25f7fc 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -17,7 +17,10 @@ from executorch.backends.qualcomm.builders.node_visitor import dq_ops from executorch.backends.qualcomm.qnn_preprocess import QnnBackend from executorch.backends.qualcomm.quantizer.quantizer import ModuleQConfig, QuantDtype -from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset +from executorch.backends.qualcomm.serialization.qc_schema import ( + QcomChipset, + QnnExecuTorchBackendType, +) from executorch.backends.qualcomm.utils.constants import ( QCOM_DTYPE, QCOM_PASS_ACTIVATE_KEY, @@ -165,6 +168,7 @@ class TestQNN(unittest.TestCase): op_package_dir: str = "" target: str = "" model_name: str = "" + backend: str = "" online_prepare: bool = False use_8a8w: str = "8a8w" use_16a16w: str = "16a16w" @@ -178,8 +182,6 @@ class TestQNN(unittest.TestCase): dump_intermediate_outputs: bool = False inference_speed: float = 0.0 inference_speed_output_path = "outputs/inference_speed.txt" - model_name: str = "" - oss_repo: str = "" def _assert_outputs_equal(self, model_output, ref_output): self.assertTrue(len(ref_output) == len(model_output)) @@ -216,6 +218,9 @@ def _save_model_and_expected_output( return ref_outputs, pte_fname + def get_backend_type(self): + return getattr(QnnExecuTorchBackendType, f"k{self.backend.title()}Backend") + def required_envs(self, conditions=None) -> bool: conditions = [] if conditions is None else conditions return all( @@ -416,6 +421,7 @@ def validate_intermediate_tensor(): dump_intermediate_outputs=( True if expected_intermediate_events != -1 else False ), + backend=self.get_backend_type(), expected_input_shape=( (tensor.shape for tensor in processed_inputs) if check_io_shape diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py index 3922fc217a1..1e952696cb9 100644 --- a/backends/qualcomm/utils/utils.py +++ b/backends/qualcomm/utils/utils.py @@ -34,6 +34,8 @@ QcomChipset, QnnExecuTorchBackendOptions, QnnExecuTorchBackendType, + QnnExecuTorchGpuBackendOptions, + QnnExecuTorchGpuPrecision, QnnExecuTorchHtpBackendOptions, QnnExecuTorchHtpPerformanceMode, QnnExecuTorchHtpPrecision, @@ -934,6 +936,47 @@ def draw_graph(title, path, graph_module: torch.fx.GraphModule): f.write(graph.get_dot_graph().create_svg()) +def generate_gpu_compiler_spec( + precision: QnnExecuTorchGpuPrecision = QnnExecuTorchGpuPrecision.kGpuPrecisionUserProvided, + use_memory_optimizations: bool = True, + use_node_optimizations: bool = True, + use_queue_recording: bool = True, + use_weight_sharing: bool = False, +) -> QnnExecuTorchBackendOptions: + """ + Helper function generating backend options for QNN HTP + + Args: + precision: + kGpuPrecisionFp32 - Sets the precision mode to floating point 32-bit (FP32). + kGpuPrecisionFp16 - Sets the precision mode to floating point 16-bit (FP16). + kGpuPrecisionHybrid - Sets the precision mode to FP16 for storage and FP32 for calculations. + kGpuPrecisionUserProvided - Uses the tensor data type provided by the user. + use_memory_optimizations: If true, backend will share NATIVE tensor memory + based upon analysis of the network topology. + use_node_optimizations: If true, backend will fuse compatible operations into + one operation to improve performance. + use_queue_recording: If true, backend will use queue recording to improve performance. + use_weight_sharing: Used with multiple_graphs, where model size will be + reduced when operations have the same weights across multiple graphs. + + Returns: + QnnExecuTorchGpuBackendOptions: backend options for QNN GPU. + """ + # TODO: enable performance hint mechanism in runtime and make this as an option + gpu_options = QnnExecuTorchGpuBackendOptions() + gpu_options.precision = precision + gpu_options.use_memory_optimizations = use_memory_optimizations + gpu_options.use_node_optimizations = use_node_optimizations + gpu_options.use_queue_recording = use_queue_recording + gpu_options.use_weight_sharing = use_weight_sharing + + return QnnExecuTorchBackendOptions( + backend_type=QnnExecuTorchBackendType.kGpuBackend, + gpu_options=gpu_options, + ) + + def generate_htp_compiler_spec( use_fp16: bool, use_dlbc: bool = False, diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py index 920bad37ac4..ca6f63a87e3 100755 --- a/examples/qualcomm/utils.py +++ b/examples/qualcomm/utils.py @@ -31,6 +31,7 @@ ) from executorch.backends.qualcomm.serialization.qc_schema import ( QcomChipset, + QnnExecuTorchBackendType, QnnExecuTorchOpPackageOptions, ) from executorch.backends.qualcomm.utils.utils import ( @@ -83,6 +84,7 @@ def __init__( dump_intermediate_outputs=False, runner="examples/qualcomm/executor_runner/qnn_executor_runner", target="aarch64-android", + backend=QnnExecuTorchBackendType.kHtpBackend, expected_input_shape=None, expected_output_shape=None, ): @@ -103,6 +105,7 @@ def __init__( self.shared_buffer = shared_buffer self.runner = runner self.target = target + self.backend = backend self.expected_input_shape = expected_input_shape self.expected_output_shape = expected_output_shape self.extra_cmds = "" @@ -130,9 +133,9 @@ def push(self, inputs=None, input_list=None, files=None, init_env=True): self._adb(["shell", f"rm -rf {self.workspace}"]) self._adb(["shell", f"mkdir -p {self.workspace}"]) - # necessary artifacts - artifacts = [ - *self.pte_path, + # necessary artifacts + artifacts = { + QnnExecuTorchBackendType.kHtpBackend: [ f"{self.qnn_sdk}/lib/{self.target}/libQnnHtp.so", ( f"{self.qnn_sdk}/lib/hexagon-v{self.htp_arch}/" @@ -143,11 +146,21 @@ def push(self, inputs=None, input_list=None, files=None, init_env=True): f"libQnnHtpV{self.htp_arch}Stub.so" ), f"{self.qnn_sdk}/lib/{self.target}/libQnnHtpPrepare.so", + ], + QnnExecuTorchBackendType.kGpuBackend: [ + f"{self.qnn_sdk}/lib/{self.target}/libQnnGpu.so", + ], + }[self.backend] + + artifacts.extend( + [ + *self.pte_path, f"{self.qnn_sdk}/lib/{self.target}/libQnnSystem.so", f"{self.build_path}/{self.runner}", f"{self.build_path}/backends/qualcomm/libqnn_executorch_backend.so", f"{self.qnn_sdk}/lib/{self.target}/libQnnModelDlc.so", ] + ) with tempfile.TemporaryDirectory() as tmp_dir: input_list_file, input_files = generate_inputs( tmp_dir, self.input_list_filename, inputs