diff --git a/backends/qualcomm/README.md b/backends/qualcomm/README.md
index 1ba01a659c4..18024e0ea93 100644
--- a/backends/qualcomm/README.md
+++ b/backends/qualcomm/README.md
@@ -56,7 +56,7 @@ backends/qualcomm
 |   |       # Meanwhile, this is also the runtime responsbile for executing compiled
 |   |       # models on a device.
 |   └── backends # Backends supported by QNN.
-|       └── htpbackend
+|       └── gpu / htp
 |           ├── aarch64 # Configuration required to run on device. (Device Part).
 |           └── x86_64 # Configuration required to compile graph on host. (AoT Part).
 ├── scripts # Misc supporting scripts, not related to core functionality.
diff --git a/backends/qualcomm/runtime/backends/CMakeLists.txt b/backends/qualcomm/runtime/backends/CMakeLists.txt
index 6a44f3234c5..9c43a6b0c2a 100644
--- a/backends/qualcomm/runtime/backends/CMakeLists.txt
+++ b/backends/qualcomm/runtime/backends/CMakeLists.txt
@@ -43,58 +43,70 @@ target_sources(
                        ${CMAKE_CURRENT_LIST_DIR}/QnnProfiler.cpp
 )
 
-# qnn_device
-set(HOST_ARCHITECTURE
-    ${CMAKE_CURRENT_LIST_DIR}/htpbackend/${CMAKE_SYSTEM_PROCESSOR}
+set(HOST_ARCHITECTURE_GPU
+    ${CMAKE_CURRENT_LIST_DIR}/gpu/${CMAKE_SYSTEM_PROCESSOR}
+)
+set(HOST_ARCHITECTURE_HTP
+    ${CMAKE_CURRENT_LIST_DIR}/htp/${CMAKE_SYSTEM_PROCESSOR}
 )
+set(HOST_ARCHITECTURE_IR ${CMAKE_CURRENT_LIST_DIR}/ir/${CMAKE_SYSTEM_PROCESSOR})
 
+# qnn_device
 target_sources(
   qnn_device
   PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnDeviceCommon.h
-         ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDevice.h
+         ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuDevice.h
+         ${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevice.h
   PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnDeviceCommon.cpp
-          ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDevice.cpp
-          ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDevicePlatformInfoConfig.h
-          ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDeviceCustomConfig.h
+          ${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevice.cpp
+          ${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevicePlatformInfoConfig.h
+          ${CMAKE_CURRENT_LIST_DIR}/htp/HtpDeviceCustomConfig.h
           # When offline prepare context cache in x86 host we have to provide
           # platform infomation and SocModel to Qnn
-          ${HOST_ARCHITECTURE}/HtpDevicePlatformInfoConfig.cpp
-          ${HOST_ARCHITECTURE}/HtpDeviceCustomConfig.cpp
+          ${HOST_ARCHITECTURE_HTP}/HtpDevicePlatformInfoConfig.cpp
+          ${HOST_ARCHITECTURE_HTP}/HtpDeviceCustomConfig.cpp
 )
 
 # qnn_context
 target_sources(
   qnn_context
   PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.h
-         ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContext.h
-         ${CMAKE_CURRENT_LIST_DIR}/irbackend/IrContext.h
-  PRIVATE
-    ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContext.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContextCustomConfig.h
-    ${HOST_ARCHITECTURE}/HtpContextCustomConfig.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/IrContext.cpp
+         ${CMAKE_CURRENT_LIST_DIR}/htp/HtpContext.h
+         ${CMAKE_CURRENT_LIST_DIR}/ir/IrContext.h
+         ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContext.h
+  PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.cpp
+          ${CMAKE_CURRENT_LIST_DIR}/htp/HtpContext.cpp
+          ${CMAKE_CURRENT_LIST_DIR}/htp/HtpContextCustomConfig.h
+          ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContext.cpp
+          ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContextCustomConfig.h
+          ${HOST_ARCHITECTURE_GPU}/GpuContextCustomConfig.cpp
+          ${HOST_ARCHITECTURE_HTP}/HtpContextCustomConfig.cpp
+          ${HOST_ARCHITECTURE_IR}/IrContext.cpp
 )
 
 # qnn_backend_cache
 target_sources(
   qnn_backend_cache
   PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCache.h
-         ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackendCache.h
+         ${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackendCache.h
   PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCache.cpp
-          ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackendCache.cpp
+          ${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackendCache.cpp
 )
 
 # qnn_graph
 target_sources(
   qnn_graph
   PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnGraphCommon.h
-         ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraph.h
+         ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraph.h
+         ${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraph.h
   PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnGraphCommon.cpp
-          ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraph.cpp
-          ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraphCustomConfig.h
-          ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraphCustomConfig.cpp
-          ${HOST_ARCHITECTURE}/HtpGraphCustomConfig.cpp
+          ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraph.cpp
+          ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraphCustomConfig.h
+          ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraphCustomConfig.cpp
+          ${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraph.cpp
+          ${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraphCustomConfig.h
+          ${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraphCustomConfig.cpp
+          ${HOST_ARCHITECTURE_HTP}/HtpGraphCustomConfig.cpp
 )
 
 # qnn_op_package_manager
@@ -108,9 +120,13 @@ target_sources(
 target_sources(
   qnn_backend
   PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCommon.h
-         ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackend.h
-         ${CMAKE_CURRENT_LIST_DIR}/irbackend/IrBackend.h
+         ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackend.h
+         ${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackend.h
+         ${CMAKE_CURRENT_LIST_DIR}/ir/IrBackend.h
   PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCommon.cpp
+          ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackend.cpp
+          ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackendCustomConfig.h
+          ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackendCustomConfig.cpp
 )
 
 # qnn_mem_manager
@@ -138,6 +154,5 @@ target_sources(
 target_sources(
   qnn_dlc_manager
   PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnDlcManager.h
-  PRIVATE
-    ${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/QnnDlcManager.cpp
+  PRIVATE ${HOST_ARCHITECTURE_IR}/QnnDlcManager.cpp
 )
diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp
index e7e9db6fed8..10916d20532 100644
--- a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp
+++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp
@@ -53,8 +53,16 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
             EnumNameQnnExecuTorchHtpPdSession(htp_options->pd_session()));
         QNN_EXECUTORCH_LOG_INFO(
             "use_conv_hmx in htp_options: %d", htp_options->use_conv_hmx());
+        QNN_EXECUTORCH_LOG_INFO(
+            "use_dlbc in htp_options: %d", htp_options->use_dlbc());
         QNN_EXECUTORCH_LOG_INFO(
             "use_fold_relu in htp_options: %d", htp_options->use_fold_relu());
+        QNN_EXECUTORCH_LOG_INFO(
+            "use_multi_contexts in htp_options: %d",
+            htp_options->use_multi_contexts());
+        QNN_EXECUTORCH_LOG_INFO(
+            "use_weight_sharing in htp_options: %d",
+            htp_options->use_weight_sharing());
       }
       backend_params->qnn_backend_ptr_ =
           std::make_unique<HtpBackend>(implementation, logger);
@@ -86,13 +94,66 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
           get_option(options->log_level()));
       backend_params->backend_init_state_ = BackendInitializeState::INITIALIZED;
     } break;
-    case QnnExecuTorchBackendType::kGpuBackend:
+    case QnnExecuTorchBackendType::kGpuBackend: {
+      auto gpu_options = options->backend_options()->gpu_options();
+      if (options->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo) {
+        QNN_EXECUTORCH_LOG_INFO(
+            "performance_mode in gpu_options: %s",
+            EnumNameQnnExecuTorchGpuPerformanceMode(
+                gpu_options->performance_mode()));
+        QNN_EXECUTORCH_LOG_INFO(
+            "precision in gpu_options: %s",
+            EnumNameQnnExecuTorchGpuPrecision(gpu_options->precision()));
+        QNN_EXECUTORCH_LOG_INFO(
+            "use_memory_optimizations in gpu_options: %d",
+            gpu_options->use_memory_optimizations());
+        QNN_EXECUTORCH_LOG_INFO(
+            "use_node_optimizations in gpu_options: %d",
+            gpu_options->use_node_optimizations());
+        QNN_EXECUTORCH_LOG_INFO(
+            "use_queue_recording in gpu_options: %d",
+            gpu_options->use_queue_recording());
+        QNN_EXECUTORCH_LOG_INFO(
+            "use_weight_sharing in gpu_options: %d",
+            gpu_options->use_weight_sharing());
+      }
+      backend_params->qnn_backend_ptr_ =
+          std::make_unique<GpuBackend>(implementation, logger, gpu_options);
+
+      backend_params->qnn_device_ptr_ =
+          std::make_unique<GpuDevice>(implementation, logger);
+
+      backend_params->qnn_backend_cache_ptr_ =
+          std::make_unique<QnnBackendCache>(qnn_context_blob);
+
+      backend_params->qnn_context_ptr_ = std::make_unique<GpuContext>(
+          implementation,
+          backend_params->qnn_backend_ptr_.get(),
+          backend_params->qnn_device_ptr_.get(),
+          backend_params->qnn_backend_cache_ptr_.get(),
+          qnn_dlc_manager,
+          gpu_options);
+
+      backend_params->qnn_graph_ptr_ = std::make_unique<GpuGraph>(
+          implementation,
+          backend_params->qnn_backend_ptr_.get(),
+          backend_params->qnn_context_ptr_.get(),
+          options->profile_level(),
+          gpu_options);
+    } break;
     case QnnExecuTorchBackendType::kDspBackend:
     case QnnExecuTorchBackendType::kUndefinedBackend:
     default:
       return nullptr;
   }
 
+  backend_params->qnn_mem_manager_ptr_ = std::make_unique<QnnMemManager>(
+      implementation,
+      backend_params->qnn_context_ptr_.get(),
+      options->log_level());
+
+  backend_params->backend_init_state_ = BackendInitializeState::INITIALIZED;
+
   if (backend_params->qnn_backend_ptr_->VerifyQNNSDKVersion() == Error::Ok) {
     return backend_params;
   }
diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.h b/backends/qualcomm/runtime/backends/QnnBackendFactory.h
index 3d78a36b9f0..c6c112ccf2c 100644
--- a/backends/qualcomm/runtime/backends/QnnBackendFactory.h
+++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.h
@@ -17,11 +17,15 @@
 #include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnLogger.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnMemManager.h>
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h>
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.h>
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h>
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h>
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h>
+#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuBackend.h>
+#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuContext.h>
+#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuDevice.h>
+#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuGraph.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpBackend.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpBackendCache.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpContext.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpDevice.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpGraph.h>
 
 #include <memory>
 namespace executorch {
diff --git a/backends/qualcomm/runtime/backends/QnnDeviceCommon.h b/backends/qualcomm/runtime/backends/QnnDeviceCommon.h
index 85de00f8623..f0f1b5b0fbd 100644
--- a/backends/qualcomm/runtime/backends/QnnDeviceCommon.h
+++ b/backends/qualcomm/runtime/backends/QnnDeviceCommon.h
@@ -29,7 +29,7 @@ class QnnDevice {
     return handle_;
   }
 
-  executorch::runtime::Error Configure();
+  virtual executorch::runtime::Error Configure();
 
  protected:
   virtual executorch::runtime::Error MakeConfig(
diff --git a/backends/qualcomm/runtime/backends/QnnDlcManager.h b/backends/qualcomm/runtime/backends/QnnDlcManager.h
index a57906df4e3..940c73e518a 100644
--- a/backends/qualcomm/runtime/backends/QnnDlcManager.h
+++ b/backends/qualcomm/runtime/backends/QnnDlcManager.h
@@ -10,7 +10,7 @@
 
 #include <QnnTypes.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendFactory.h>
-#include <executorch/backends/qualcomm/runtime/backends/irbackend/IrContext.h>
+#include <executorch/backends/qualcomm/runtime/backends/ir/IrContext.h>
 
 #include "QnnWrapperUtils.hpp"
 namespace executorch {
diff --git a/backends/qualcomm/runtime/backends/gpu/GpuBackend.cpp b/backends/qualcomm/runtime/backends/gpu/GpuBackend.cpp
new file mode 100644
index 00000000000..2332193d30d
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/gpu/GpuBackend.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuBackend.h>
+
+#include "GPU/QnnGpuCommon.h"
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+using executorch::runtime::Error;
+
+GpuBackend::GpuBackend(
+    const QnnImplementation& implementation,
+    QnnLogger* logger,
+    const QnnExecuTorchGpuBackendOptions* gpu_options)
+    : QnnBackend(implementation, logger) {
+  gpu_backend_custom_config_ =
+      std::make_unique<GpuBackendCustomConfig>(gpu_options);
+}
+
+Qnn_Version_t GpuBackend::GetExpectedBackendVersion() const {
+  Qnn_Version_t backend_version;
+  backend_version.major = QNN_GPU_API_VERSION_MAJOR;
+  backend_version.minor = QNN_GPU_API_VERSION_MINOR;
+  backend_version.patch = QNN_GPU_API_VERSION_PATCH;
+  return backend_version;
+}
+
+bool GpuBackend::IsProfileEventTypeParentOfNodeTime(
+    QnnProfile_EventType_t event_type) {
+  return (event_type == QNN_PROFILE_EVENTTYPE_EXECUTE);
+}
+
+Error GpuBackend::MakeConfig(std::vector<const QnnBackend_Config_t*>& config) {
+  const std::vector<QnnBackend_CustomConfig_t>& backend_custom_config =
+      gpu_backend_custom_config_->CreateBackendCustomConfig();
+
+  uint32_t num_custom_configs = backend_custom_config.size();
+  backend_config_.resize(num_custom_configs);
+  // +1 for null terminated
+  config.reserve(num_custom_configs + 1);
+
+  for (std::size_t i = 0; i < num_custom_configs; ++i) {
+    backend_config_[i].option = QNN_BACKEND_CONFIG_OPTION_CUSTOM;
+    backend_config_[i].customConfig = backend_custom_config[i];
+    config.push_back(&backend_config_[i]);
+  }
+
+  config.push_back(nullptr);
+  return Error::Ok;
+}
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/gpu/GpuBackend.h b/backends/qualcomm/runtime/backends/gpu/GpuBackend.h
new file mode 100644
index 00000000000..f0a2de2fc8c
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/gpu/GpuBackend.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.h>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+class GpuBackend : public QnnBackend {
+ public:
+  GpuBackend(
+      const QnnImplementation& implementation,
+      QnnLogger* logger,
+      const QnnExecuTorchGpuBackendOptions* gpu_options);
+
+  Qnn_Version_t GetExpectedBackendVersion() const override;
+
+  bool IsProfileEventTypeParentOfNodeTime(
+      QnnProfile_EventType_t event_type) override;
+
+ protected:
+  executorch::runtime::Error MakeConfig(
+      std::vector<const QnnBackend_Config_t*>& config) override;
+
+ private:
+  std::vector<QnnBackend_Config_t> backend_config_;
+  std::unique_ptr<GpuBackendCustomConfig> gpu_backend_custom_config_;
+};
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.cpp b/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.cpp
new file mode 100644
index 00000000000..60e289493d0
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.h>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+GpuBackendCustomConfig::GpuBackendCustomConfig(
+    const QnnExecuTorchGpuBackendOptions* gpu_options)
+    : gpu_options_(gpu_options) {}
+
+QnnGpuBackend_CustomConfig_t*
+GpuBackendCustomConfig::AllocBackendCustomConfig() {
+  gpu_backend_config_.emplace_back(
+      std::make_unique<QnnGpuBackend_CustomConfig_t>());
+  gpu_backend_config_.back()->option = QNN_GPU_BACKEND_CONFIG_OPTION_UNDEFINED;
+  return gpu_backend_config_.back().get();
+}
+
+std::vector<QnnBackend_CustomConfig_t>
+GpuBackendCustomConfig::CreateBackendCustomConfig() {
+  std::vector<QnnBackend_CustomConfig_t> ret;
+  QnnGpuBackend_CustomConfig_t* p_custom_config = nullptr;
+
+  if (gpu_options_->use_weight_sharing()) {
+    p_custom_config = AllocBackendCustomConfig();
+    p_custom_config->option =
+        QNN_GPU_BACKEND_CONFIG_OPTION_WEIGHT_SHARING_ENABLED;
+    p_custom_config->weightSharingEnabled = 1;
+    ret.push_back(static_cast<QnnBackend_CustomConfig_t>(p_custom_config));
+  }
+  return ret;
+}
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.h b/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.h
new file mode 100644
index 00000000000..150235a82e6
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>
+
+#include <memory>
+#include <vector>
+
+#include "GPU/QnnGpuBackend.h"
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+using namespace qnn_delegate;
+
+class GpuBackendCustomConfig {
+ public:
+  explicit GpuBackendCustomConfig(
+      const QnnExecuTorchGpuBackendOptions* gpu_options);
+
+  std::vector<QnnBackend_CustomConfig_t> CreateBackendCustomConfig();
+
+ private:
+  QnnGpuBackend_CustomConfig_t* AllocBackendCustomConfig();
+  std::vector<std::unique_ptr<QnnGpuBackend_CustomConfig_t>>
+      gpu_backend_config_;
+  const QnnExecuTorchGpuBackendOptions* gpu_options_;
+};
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/gpu/GpuContext.cpp b/backends/qualcomm/runtime/backends/gpu/GpuContext.cpp
new file mode 100644
index 00000000000..d3816fc560e
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/gpu/GpuContext.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuContext.h>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+using executorch::runtime::Error;
+
+GpuContext::GpuContext(
+    const QnnImplementation& implementation,
+    QnnBackend* backend,
+    QnnDevice* device,
+    QnnBackendCache* cache,
+    QnnDlcManager* qnn_dlc_manager,
+    const QnnExecuTorchGpuBackendOptions* gpu_options)
+    : QnnContext(implementation, backend, device, cache, qnn_dlc_manager) {
+  gpu_context_custom_config_ =
+      std::make_unique<GpuContextCustomConfig>(gpu_options);
+}
+
+Error GpuContext::MakeConfig(std::vector<const QnnContext_Config_t*>& config) {
+  const std::vector<QnnContext_CustomConfig_t>& context_custom_config =
+      gpu_context_custom_config_->CreateContextCustomConfig();
+
+  uint32_t num_custom_configs = context_custom_config.size();
+  context_config_.resize(num_custom_configs);
+  // +1 for null terminated
+  config.reserve(num_custom_configs + 1);
+
+  for (std::size_t i = 0; i < num_custom_configs; ++i) {
+    context_config_[i].option = QNN_CONTEXT_CONFIG_OPTION_CUSTOM;
+    context_config_[i].customConfig = context_custom_config[i];
+    config.push_back(&context_config_[i]);
+  }
+
+  config.push_back(nullptr);
+  return Error::Ok;
+}
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/gpu/GpuContext.h b/backends/qualcomm/runtime/backends/gpu/GpuContext.h
new file mode 100644
index 00000000000..873117c0e50
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/gpu/GpuContext.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuContextCustomConfig.h>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+class QnnDlcManager;
+class GpuContext : public QnnContext {
+ public:
+  GpuContext(
+      const QnnImplementation& implementation,
+      QnnBackend* backend,
+      QnnDevice* device,
+      QnnBackendCache* cache,
+      QnnDlcManager* qnn_dlc_manager,
+      const QnnExecuTorchGpuBackendOptions* gpu_options);
+
+ protected:
+  executorch::runtime::Error MakeConfig(
+      std::vector<const QnnContext_Config_t*>& config) override;
+
+ private:
+  std::vector<QnnContext_Config_t> context_config_;
+  std::unique_ptr<GpuContextCustomConfig> gpu_context_custom_config_;
+};
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/gpu/GpuContextCustomConfig.h b/backends/qualcomm/runtime/backends/gpu/GpuContextCustomConfig.h
new file mode 100644
index 00000000000..8a1f635bee0
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/gpu/GpuContextCustomConfig.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnContextCommon.h>
+
+#include <memory>
+#include <vector>
+
+#include "GPU/QnnGpuContext.h"
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+using namespace qnn_delegate;
+
+class GpuContextCustomConfig {
+ public:
+  explicit GpuContextCustomConfig(
+      const QnnExecuTorchGpuBackendOptions* gpu_options)
+      : gpu_options_(gpu_options) {}
+
+  std::vector<QnnContext_CustomConfig_t> CreateContextCustomConfig();
+
+ private:
+  QnnGpuContext_CustomConfig_t* AllocContextCustomConfig() {
+    gpu_context_config_.emplace_back(
+        std::make_unique<QnnGpuContext_CustomConfig_t>());
+    gpu_context_config_.back()->option =
+        QNN_GPU_CONTEXT_CONFIG_OPTION_UNDEFINED;
+    return gpu_context_config_.back().get();
+  }
+  std::vector<std::unique_ptr<QnnGpuContext_CustomConfig_t>>
+      gpu_context_config_;
+  [[maybe_unused]] const QnnExecuTorchGpuBackendOptions* gpu_options_;
+};
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/gpu/GpuDevice.h b/backends/qualcomm/runtime/backends/gpu/GpuDevice.h
new file mode 100644
index 00000000000..20d6568ecc3
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/gpu/GpuDevice.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+
+#include <executorch/backends/qualcomm/runtime/backends/QnnDeviceCommon.h>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+class GpuDevice : public QnnDevice {
+ public:
+  GpuDevice(const QnnImplementation& implementation, QnnLogger* logger)
+      : QnnDevice(implementation, logger){};
+
+  // GPU backend does not support device creation
+  executorch::runtime::Error Configure() override {
+    return executorch::runtime::Error::Ok;
+  }
+};
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/gpu/GpuGraph.cpp b/backends/qualcomm/runtime/backends/gpu/GpuGraph.cpp
new file mode 100644
index 00000000000..d626ac47c7d
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/gpu/GpuGraph.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuGraph.h>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+using executorch::runtime::Error;
+
+GpuGraph::GpuGraph(
+    const QnnImplementation& implementation,
+    QnnBackend* backend,
+    QnnContext* context,
+    const QnnExecuTorchProfileLevel& profile_level,
+    const QnnExecuTorchGpuBackendOptions* gpu_options)
+    : QnnGraph(implementation, backend, context, profile_level) {
+  gpu_graph_custom_config_ =
+      std::make_unique<GpuGraphCustomConfig>(gpu_options);
+}
+
+Error GpuGraph::MakeConfig(std::vector<const QnnGraph_Config_t*>& config) {
+  const std::vector<QnnGraph_CustomConfig_t>& graph_custom_config =
+      gpu_graph_custom_config_->CreateGraphCustomConfig();
+
+  uint32_t num_custom_configs = graph_custom_config.size();
+  graph_config_.resize(num_custom_configs);
+  // +1 for null terminated
+  config.reserve(num_custom_configs + 1);
+
+  for (std::size_t i = 0; i < num_custom_configs; ++i) {
+    graph_config_[i].option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
+    graph_config_[i].customConfig = graph_custom_config[i];
+    config.push_back(&graph_config_[i]);
+  }
+
+  config.push_back(nullptr);
+  return Error::Ok;
+}
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/gpu/GpuGraph.h b/backends/qualcomm/runtime/backends/gpu/GpuGraph.h
new file mode 100644
index 00000000000..c2b5bf2832d
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/gpu/GpuGraph.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.h>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+class GpuGraph : public QnnGraph {
+ public:
+  GpuGraph(
+      const QnnImplementation& implementation,
+      QnnBackend* backend,
+      QnnContext* context,
+      const QnnExecuTorchProfileLevel& profile_level,
+      const QnnExecuTorchGpuBackendOptions* gpu_options);
+
+ protected:
+  executorch::runtime::Error MakeConfig(
+      std::vector<const QnnGraph_Config_t*>& config) override;
+
+ private:
+  std::vector<QnnGraph_Config_t> graph_config_;
+  std::unique_ptr<GpuGraphCustomConfig> gpu_graph_custom_config_;
+};
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.cpp b/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.cpp
new file mode 100644
index 00000000000..17f094db805
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.h>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+GpuGraphCustomConfig::GpuGraphCustomConfig(
+    const QnnExecuTorchGpuBackendOptions* gpu_options)
+    : gpu_options_(gpu_options) {}
+
+QnnGpuGraph_CustomConfig_t* GpuGraphCustomConfig::AllocGraphCustomConfig() {
+  gpu_graph_config_.emplace_back(
+      std::make_unique<QnnGpuGraph_CustomConfig_t>());
+  return gpu_graph_config_.back().get();
+}
+
+std::vector<QnnGraph_CustomConfig_t>
+GpuGraphCustomConfig::CreateGraphCustomConfig() {
+  std::vector<QnnGraph_CustomConfig_t> ret;
+  QnnGpuGraph_CustomConfig_t* p_custom_config = nullptr;
+
+  p_custom_config = AllocGraphCustomConfig();
+  p_custom_config->precision =
+      static_cast<QnnGpu_Precision_t>(gpu_options_->precision());
+  p_custom_config->disableMemoryOptimizations =
+      !gpu_options_->use_memory_optimizations();
+  p_custom_config->disableNodeOptimizations =
+      !gpu_options_->use_node_optimizations();
+  p_custom_config->disableQueueRecording = !gpu_options_->use_queue_recording();
+  ret.push_back(static_cast<QnnGraph_CustomConfig_t>(p_custom_config));
+  return ret;
+}
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.h b/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.h
new file mode 100644
index 00000000000..a47cd1a3345
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnGraphCommon.h>
+
+#include <memory>
+#include <vector>
+
+#include "GPU/QnnGpuGraph.h"
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+using namespace qnn_delegate;
+
+class GpuGraphCustomConfig {
+ public:
+  explicit GpuGraphCustomConfig(
+      const QnnExecuTorchGpuBackendOptions* gpu_options);
+
+  std::vector<QnnGraph_CustomConfig_t> CreateGraphCustomConfig();
+
+ private:
+  QnnGpuGraph_CustomConfig_t* AllocGraphCustomConfig();
+  std::vector<std::unique_ptr<QnnGpuGraph_CustomConfig_t>> gpu_graph_config_;
+  const QnnExecuTorchGpuBackendOptions* gpu_options_;
+};
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/gpu/aarch64/GpuContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/gpu/aarch64/GpuContextCustomConfig.cpp
new file mode 100644
index 00000000000..b4f200897ba
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/gpu/aarch64/GpuContextCustomConfig.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuContextCustomConfig.h>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+std::vector<QnnContext_CustomConfig_t>
+GpuContextCustomConfig::CreateContextCustomConfig() {
+  std::vector<QnnContext_CustomConfig_t> ret;
+  QnnGpuContext_CustomConfig_t* p_custom_config = nullptr;
+
+  p_custom_config = AllocContextCustomConfig();
+  p_custom_config->option = QNN_GPU_CONTEXT_CONFIG_OPTION_PERF_HINT;
+  p_custom_config->perfHint =
+      static_cast<QnnGpuContext_PerfHint_t>(gpu_options_->performance_mode());
+  ret.push_back(static_cast<QnnContext_CustomConfig_t>(p_custom_config));
+  return ret;
+}
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/gpu/x86_64/GpuContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/gpu/x86_64/GpuContextCustomConfig.cpp
new file mode 100644
index 00000000000..69784c1797f
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/gpu/x86_64/GpuContextCustomConfig.cpp
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuContextCustomConfig.h>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+std::vector<QnnContext_CustomConfig_t>
+GpuContextCustomConfig::CreateContextCustomConfig() {
+  return {};
+}
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h b/backends/qualcomm/runtime/backends/htp/HtpBackend.h
similarity index 100%
rename from backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h
rename to backends/qualcomm/runtime/backends/htp/HtpBackend.h
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.cpp b/backends/qualcomm/runtime/backends/htp/HtpBackendCache.cpp
similarity index 96%
rename from backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.cpp
rename to backends/qualcomm/runtime/backends/htp/HtpBackendCache.cpp
index 030b5666daf..3038a100d03 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.cpp
+++ b/backends/qualcomm/runtime/backends/htp/HtpBackendCache.cpp
@@ -5,7 +5,7 @@
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpBackendCache.h>
 #include "HTP/QnnHtpSystemContext.h"
 
 namespace executorch {
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.h b/backends/qualcomm/runtime/backends/htp/HtpBackendCache.h
similarity index 100%
rename from backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.h
rename to backends/qualcomm/runtime/backends/htp/HtpBackendCache.h
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp b/backends/qualcomm/runtime/backends/htp/HtpContext.cpp
similarity index 94%
rename from backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp
rename to backends/qualcomm/runtime/backends/htp/HtpContext.cpp
index 50d299b55e9..0056a2c0917 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp
+++ b/backends/qualcomm/runtime/backends/htp/HtpContext.cpp
@@ -7,7 +7,7 @@
  */
 
 #include <executorch/backends/qualcomm/runtime/Logging.h>
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpContext.h>
 
 #include "HTP/QnnHtpCommon.h"
 
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h b/backends/qualcomm/runtime/backends/htp/HtpContext.h
similarity index 94%
rename from backends/qualcomm/runtime/backends/htpbackend/HtpContext.h
rename to backends/qualcomm/runtime/backends/htp/HtpContext.h
index 88660db080a..ff937593434 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h
+++ b/backends/qualcomm/runtime/backends/htp/HtpContext.h
@@ -10,7 +10,7 @@
 
 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnContextCommon.h>
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpContextCustomConfig.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpContextCustomConfig.h>
 
 namespace executorch {
 namespace backends {
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContextCustomConfig.h b/backends/qualcomm/runtime/backends/htp/HtpContextCustomConfig.h
similarity index 100%
rename from backends/qualcomm/runtime/backends/htpbackend/HtpContextCustomConfig.h
rename to backends/qualcomm/runtime/backends/htp/HtpContextCustomConfig.h
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp b/backends/qualcomm/runtime/backends/htp/HtpDevice.cpp
similarity index 99%
rename from backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp
rename to backends/qualcomm/runtime/backends/htp/HtpDevice.cpp
index 35a20048fc5..75809383ccd 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp
+++ b/backends/qualcomm/runtime/backends/htp/HtpDevice.cpp
@@ -7,7 +7,7 @@
  */
 
 #include <executorch/backends/qualcomm/runtime/Logging.h>
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpDevice.h>
 
 #include "HTP/QnnHtpCommon.h"
 #include "Saver/QnnSaverCommon.h"
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h b/backends/qualcomm/runtime/backends/htp/HtpDevice.h
similarity index 92%
rename from backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h
rename to backends/qualcomm/runtime/backends/htp/HtpDevice.h
index 9052deb6b52..82eb8aab9ab 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h
+++ b/backends/qualcomm/runtime/backends/htp/HtpDevice.h
@@ -9,8 +9,8 @@
 
 #include <executorch/backends/qualcomm/runtime/QnnBackendOptions.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnDeviceCommon.h>
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpDeviceCustomConfig.h>
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpDevicePlatformInfoConfig.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpDeviceCustomConfig.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpDevicePlatformInfoConfig.h>
 #include <memory>
 
 #include "HTP/QnnHtpDevice.h"
@@ -38,7 +38,7 @@ class HtpDevice : public QnnDevice {
   }
   ~HtpDevice();
 
-  // Defines Qnn performance mode vote types for htpbackend
+  // Defines Qnn performance mode vote types for htp
   enum PerformanceModeVoteType {
     kNoVote = 0,
     kUpVote = 1,
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDeviceCustomConfig.h b/backends/qualcomm/runtime/backends/htp/HtpDeviceCustomConfig.h
similarity index 100%
rename from backends/qualcomm/runtime/backends/htpbackend/HtpDeviceCustomConfig.h
rename to backends/qualcomm/runtime/backends/htp/HtpDeviceCustomConfig.h
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDevicePlatformInfoConfig.h b/backends/qualcomm/runtime/backends/htp/HtpDevicePlatformInfoConfig.h
similarity index 100%
rename from backends/qualcomm/runtime/backends/htpbackend/HtpDevicePlatformInfoConfig.h
rename to backends/qualcomm/runtime/backends/htp/HtpDevicePlatformInfoConfig.h
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.cpp b/backends/qualcomm/runtime/backends/htp/HtpGraph.cpp
similarity index 93%
rename from backends/qualcomm/runtime/backends/htpbackend/HtpGraph.cpp
rename to backends/qualcomm/runtime/backends/htp/HtpGraph.cpp
index 29dcf0a58c3..6208febe61a 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.cpp
+++ b/backends/qualcomm/runtime/backends/htp/HtpGraph.cpp
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpGraph.h>
 namespace executorch {
 namespace backends {
 namespace qnn {
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h b/backends/qualcomm/runtime/backends/htp/HtpGraph.h
similarity index 93%
rename from backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h
rename to backends/qualcomm/runtime/backends/htp/HtpGraph.h
index c3add50d08b..db24a64cdfd 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h
+++ b/backends/qualcomm/runtime/backends/htp/HtpGraph.h
@@ -8,7 +8,7 @@
 #pragma once
 
 #include <executorch/backends/qualcomm/runtime/backends/QnnGraphCommon.h>
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.h>
 
 #include <memory>
 
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.cpp
similarity index 97%
rename from backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.cpp
rename to backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.cpp
index d43f8320285..17b8438880d 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.cpp
+++ b/backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.cpp
@@ -7,7 +7,7 @@
  */
 #include <executorch/backends/qualcomm/runtime/Logging.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendCache.h>
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.h>
 namespace executorch {
 namespace backends {
 namespace qnn {
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.h b/backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.h
similarity index 100%
rename from backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.h
rename to backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.h
diff --git a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/aarch64/HtpContextCustomConfig.cpp
similarity index 87%
rename from backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpContextCustomConfig.cpp
rename to backends/qualcomm/runtime/backends/htp/aarch64/HtpContextCustomConfig.cpp
index 04a5d844dd0..676795797f8 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpContextCustomConfig.cpp
+++ b/backends/qualcomm/runtime/backends/htp/aarch64/HtpContextCustomConfig.cpp
@@ -6,8 +6,8 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h>
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpContextCustomConfig.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpContext.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpContextCustomConfig.h>
 
 namespace executorch {
 namespace backends {
diff --git a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDeviceCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/aarch64/HtpDeviceCustomConfig.cpp
similarity index 84%
rename from backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDeviceCustomConfig.cpp
rename to backends/qualcomm/runtime/backends/htp/aarch64/HtpDeviceCustomConfig.cpp
index 81ac4a14372..8207f5071ba 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDeviceCustomConfig.cpp
+++ b/backends/qualcomm/runtime/backends/htp/aarch64/HtpDeviceCustomConfig.cpp
@@ -5,7 +5,7 @@
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpDeviceCustomConfig.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpDeviceCustomConfig.h>
 namespace executorch {
 namespace backends {
 namespace qnn {
diff --git a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDevicePlatformInfoConfig.cpp b/backends/qualcomm/runtime/backends/htp/aarch64/HtpDevicePlatformInfoConfig.cpp
similarity index 83%
rename from backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDevicePlatformInfoConfig.cpp
rename to backends/qualcomm/runtime/backends/htp/aarch64/HtpDevicePlatformInfoConfig.cpp
index c191791fa63..91221a78fd6 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDevicePlatformInfoConfig.cpp
+++ b/backends/qualcomm/runtime/backends/htp/aarch64/HtpDevicePlatformInfoConfig.cpp
@@ -5,7 +5,7 @@
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpDevicePlatformInfoConfig.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpDevicePlatformInfoConfig.h>
 namespace executorch {
 namespace backends {
 namespace qnn {
diff --git a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpGraphCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/aarch64/HtpGraphCustomConfig.cpp
similarity index 85%
rename from backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpGraphCustomConfig.cpp
rename to backends/qualcomm/runtime/backends/htp/aarch64/HtpGraphCustomConfig.cpp
index 096fda7b059..faac23edc12 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpGraphCustomConfig.cpp
+++ b/backends/qualcomm/runtime/backends/htp/aarch64/HtpGraphCustomConfig.cpp
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.h>
 
 namespace executorch {
 namespace backends {
diff --git a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/x86_64/HtpContextCustomConfig.cpp
similarity index 90%
rename from backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpContextCustomConfig.cpp
rename to backends/qualcomm/runtime/backends/htp/x86_64/HtpContextCustomConfig.cpp
index 1fc2940eaa7..4850afa14a2 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpContextCustomConfig.cpp
+++ b/backends/qualcomm/runtime/backends/htp/x86_64/HtpContextCustomConfig.cpp
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpContextCustomConfig.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpContextCustomConfig.h>
 
 namespace executorch {
 namespace backends {
diff --git a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDeviceCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/x86_64/HtpDeviceCustomConfig.cpp
similarity index 90%
rename from backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDeviceCustomConfig.cpp
rename to backends/qualcomm/runtime/backends/htp/x86_64/HtpDeviceCustomConfig.cpp
index 154433c10b0..9afbf489bc1 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDeviceCustomConfig.cpp
+++ b/backends/qualcomm/runtime/backends/htp/x86_64/HtpDeviceCustomConfig.cpp
@@ -5,7 +5,7 @@
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpDeviceCustomConfig.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpDeviceCustomConfig.h>
 namespace executorch {
 namespace backends {
 namespace qnn {
diff --git a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDevicePlatformInfoConfig.cpp b/backends/qualcomm/runtime/backends/htp/x86_64/HtpDevicePlatformInfoConfig.cpp
similarity index 96%
rename from backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDevicePlatformInfoConfig.cpp
rename to backends/qualcomm/runtime/backends/htp/x86_64/HtpDevicePlatformInfoConfig.cpp
index b025f0b2aa6..15c677e8a68 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDevicePlatformInfoConfig.cpp
+++ b/backends/qualcomm/runtime/backends/htp/x86_64/HtpDevicePlatformInfoConfig.cpp
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 #include <executorch/backends/qualcomm/runtime/Logging.h>
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpDevicePlatformInfoConfig.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpDevicePlatformInfoConfig.h>
 namespace executorch {
 namespace backends {
 namespace qnn {
diff --git a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpGraphCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/x86_64/HtpGraphCustomConfig.cpp
similarity index 85%
rename from backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpGraphCustomConfig.cpp
rename to backends/qualcomm/runtime/backends/htp/x86_64/HtpGraphCustomConfig.cpp
index 330ca43e20b..ec01f2bbfdd 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpGraphCustomConfig.cpp
+++ b/backends/qualcomm/runtime/backends/htp/x86_64/HtpGraphCustomConfig.cpp
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.h>
+#include <executorch/backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.h>
 
 namespace executorch {
 namespace backends {
diff --git a/backends/qualcomm/runtime/backends/irbackend/IrBackend.h b/backends/qualcomm/runtime/backends/ir/IrBackend.h
similarity index 100%
rename from backends/qualcomm/runtime/backends/irbackend/IrBackend.h
rename to backends/qualcomm/runtime/backends/ir/IrBackend.h
diff --git a/backends/qualcomm/runtime/backends/irbackend/IrContext.h b/backends/qualcomm/runtime/backends/ir/IrContext.h
similarity index 100%
rename from backends/qualcomm/runtime/backends/irbackend/IrContext.h
rename to backends/qualcomm/runtime/backends/ir/IrContext.h
diff --git a/backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp b/backends/qualcomm/runtime/backends/ir/aarch64/IrContext.cpp
similarity index 88%
rename from backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp
rename to backends/qualcomm/runtime/backends/ir/aarch64/IrContext.cpp
index 44ce8de8f46..12a27b19ccd 100644
--- a/backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp
+++ b/backends/qualcomm/runtime/backends/ir/aarch64/IrContext.cpp
@@ -9,7 +9,7 @@
 #include <fstream>
 
 #include <executorch/backends/qualcomm/runtime/Logging.h>
-#include <executorch/backends/qualcomm/runtime/backends/irbackend/IrContext.h>
+#include <executorch/backends/qualcomm/runtime/backends/ir/IrContext.h>
 
 namespace executorch {
 namespace backends {
diff --git a/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/ir/aarch64/QnnDlcManager.cpp
similarity index 100%
rename from backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp
rename to backends/qualcomm/runtime/backends/ir/aarch64/QnnDlcManager.cpp
diff --git a/backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp b/backends/qualcomm/runtime/backends/ir/x86_64/IrContext.cpp
similarity index 94%
rename from backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp
rename to backends/qualcomm/runtime/backends/ir/x86_64/IrContext.cpp
index f167aae9319..cf5df3de8e9 100644
--- a/backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp
+++ b/backends/qualcomm/runtime/backends/ir/x86_64/IrContext.cpp
@@ -9,7 +9,7 @@
 #include <fstream>
 
 #include <executorch/backends/qualcomm/runtime/Logging.h>
-#include <executorch/backends/qualcomm/runtime/backends/irbackend/IrContext.h>
+#include <executorch/backends/qualcomm/runtime/backends/ir/IrContext.h>
 namespace executorch {
 namespace backends {
 namespace qnn {
diff --git a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/ir/x86_64/QnnDlcManager.cpp
similarity index 98%
rename from backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp
rename to backends/qualcomm/runtime/backends/ir/x86_64/QnnDlcManager.cpp
index 280751cf160..7190dba0236 100644
--- a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp
+++ b/backends/qualcomm/runtime/backends/ir/x86_64/QnnDlcManager.cpp
@@ -7,7 +7,7 @@
  */
 #include <executorch/backends/qualcomm/runtime/QnnBackendOptions.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnDlcManager.h>
-#include <executorch/backends/qualcomm/runtime/backends/irbackend/IrBackend.h>
+#include <executorch/backends/qualcomm/runtime/backends/ir/IrBackend.h>
 
 namespace executorch {
 namespace backends {
diff --git a/backends/qualcomm/runtime/targets.bzl b/backends/qualcomm/runtime/targets.bzl
index db3706ba221..85cece2bae7 100644
--- a/backends/qualcomm/runtime/targets.bzl
+++ b/backends/qualcomm/runtime/targets.bzl
@@ -44,10 +44,12 @@ def define_common_targets():
                 [
                     "*.cpp",
                     "backends/*.cpp",
-                    "backends/irbackend/*.cpp",
-                    "backends/htpbackend/*.cpp",
-                ] + (["backends/htpbackend/x86_64/*.cpp"] if include_aot_qnn_lib else ["backends/htpbackend/aarch64/*.cpp"]) + (
-                    ["backends/irbackend/x86_64/*.cpp"] if include_aot_qnn_lib else ["backends/irbackend/aarch64/*.cpp"]
+                    "backends/gpu/*.cpp",
+                    "backends/htp/*.cpp",
+                    "backends/ir/*.cpp",
+                ] + (["backends/gpu/x86_64/*.cpp"] if include_aot_qnn_lib else ["backends/gpu/aarch64/*.cpp"]) + (
+                    ["backends/htp/x86_64/*.cpp"] if include_aot_qnn_lib else ["backends/htp/aarch64/*.cpp"]) + (
+                    ["backends/ir/x86_64/*.cpp"] if include_aot_qnn_lib else ["backends/ir/aarch64/*.cpp"]
                 ),
                 exclude = ["Logging.cpp"],
             ),
@@ -55,8 +57,9 @@ def define_common_targets():
                 [
                     "*.h",
                     "backends/*.h",
-                    "backends/irbackend/*.h",
-                    "backends/htpbackend/*.h",
+                    "backends/gpu/*.h",
+                    "backends/htp/*.h",
+                    "backends/ir/*.h",
                 ],
                 exclude = ["Logging.h"],
             ),
diff --git a/backends/qualcomm/serialization/qc_compiler_spec.fbs b/backends/qualcomm/serialization/qc_compiler_spec.fbs
index 9a24b95d261..548b2e5e80e 100644
--- a/backends/qualcomm/serialization/qc_compiler_spec.fbs
+++ b/backends/qualcomm/serialization/qc_compiler_spec.fbs
@@ -61,6 +61,50 @@ table SocInfo {
   htp_info:HtpInfo;
 }
 
+/// Defines performance modes available for GPU backend.
+enum QnnExecuTorchGpuPerformanceMode: int {
+  kGpuPerfHintHigh = 0,
+  kGpuPerfHintNormal,
+  kGpuPerfHintLow,
+}
+
+/// Defines the optimization levels of the graph tensors that are not input nor
+/// output tensors. This enum controls the trade-off between performance and
+/// accuracy.
+enum QnnExecuTorchGpuPrecision: int {
+  kGpuPrecisionFp32 = 0,
+  kGpuPrecisionFp16,
+  kGpuPrecisionHybrid,
+  kGpuPrecisionUserProvided,
+}
+
+/// Specifies the backend options for the GPU backend.
+table QnnExecuTorchGpuBackendOptions {
+  /// kGpuPerfHintHigh - best inference latency at the expense of power consumption.
+  /// kGpuPerfHintNormal - balanced performance dependent upon power management.
+  /// kGpuPerfHintLow - lowest power consumption at the expense of inference latency.
+  performance_mode:QnnExecuTorchGpuPerformanceMode;
+
+  /// kGpuPrecisionFp32 - best accuracy at the expense of performance.
+  /// kGpuPrecisionFp16 - best performance at the expense of accuracy.
+  /// kGpuPrecisionHybrid - good trade-off between performance and accuracy.
+  /// kGpuPrecisionUserProvided - backend will not optimize NATIVE tensor data types.
+  precision:QnnExecuTorchGpuPrecision;
+
+  /// Backend will share NATIVE tensor memory based upon analysis of the network topology.
+  use_memory_optimizations:bool;
+
+  /// Backend will fuse compatible operations into one operation to improve performance.
+  use_node_optimizations:bool;
+
+  /// Backend will use queue recording to improve performance.
+  use_queue_recording:bool;
+
+  /// When multiple graphs appear inside the same context,
+  /// weights could be reused across all graphs.
+  use_weight_sharing:bool;
+}
+
 /// Defines performance modes available for HTP backend.
 enum QnnExecuTorchHtpPerformanceMode: int {
   kHtpDefault = 0,
@@ -172,7 +216,6 @@ enum QnnExecuTorchOpPackagePlatform: int {
   AARCH64_ANDROID,
 }
 
-
 table QnnExecuTorchOpPackageInfo {
   /// The name of the op package.
   op_package_name:string;
@@ -197,7 +240,6 @@ table QnnExecuTorchOpPackageInfo {
   platform:QnnExecuTorchOpPackagePlatform;
 }
 
-
 table QnnExecuTorchOpPackageOptions {
   /// An array of QnnExecuTorchOpPackageInfo structures.
   op_package_infos:[QnnExecuTorchOpPackageInfo];
@@ -210,6 +252,8 @@ table QnnExecuTorchBackendOptions {
   backend_type:QnnExecuTorchBackendType;
 
   htp_options:QnnExecuTorchHtpBackendOptions;
+
+  gpu_options:QnnExecuTorchGpuBackendOptions;
 }
 
 table QnnExecuTorchOptions {
diff --git a/backends/qualcomm/serialization/qc_schema.py b/backends/qualcomm/serialization/qc_schema.py
index 4fe28e4e88a..80d62695211 100644
--- a/backends/qualcomm/serialization/qc_schema.py
+++ b/backends/qualcomm/serialization/qc_schema.py
@@ -10,7 +10,7 @@
 
 from dataclasses import dataclass, field
 from enum import IntEnum, unique
-from typing import List
+from typing import List, Optional
 
 
 @dataclass
@@ -85,6 +85,35 @@ class SocInfo:
 }
 
 
+@unique
+class QnnExecuTorchGpuPerformanceMode(IntEnum):
+    kGpuPerfHintHigh = 0
+    kGpuPerfHintNormal = 1
+    kGpuPerfHintLow = 2
+
+
+@unique
+class QnnExecuTorchGpuPrecision(IntEnum):
+    kGpuPrecisionFp32 = 0
+    kGpuPrecisionFp16 = 1
+    kGpuPrecisionHybrid = 2
+    kGpuPrecisionUserProvided = 3
+
+
+@dataclass
+class QnnExecuTorchGpuBackendOptions:
+    performance_mode: QnnExecuTorchGpuPerformanceMode = (
+        QnnExecuTorchGpuPerformanceMode.kGpuPerfHintHigh
+    )
+    precision: QnnExecuTorchGpuPrecision = (
+        QnnExecuTorchGpuPrecision.kGpuPrecisionUserProvided
+    )
+    use_memory_optimizations: bool = True
+    use_node_optimizations: bool = True
+    use_queue_recording: bool = True
+    use_weight_sharing: bool = False
+
+
 @unique
 class QnnExecuTorchHtpPerformanceMode(IntEnum):
     kHtpDefault = 0
@@ -155,7 +184,8 @@ class QnnExecuTorchProfileLevel(IntEnum):
 @dataclass
 class QnnExecuTorchBackendOptions:
     backend_type: QnnExecuTorchBackendType
-    htp_options: QnnExecuTorchHtpBackendOptions
+    htp_options: Optional[QnnExecuTorchHtpBackendOptions] = None
+    gpu_options: Optional[QnnExecuTorchGpuBackendOptions] = None
 
 
 @unique
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
index 8822db5f7c3..1648857049e 100644
--- a/backends/qualcomm/tests/test_qnn_delegate.py
+++ b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -30,6 +30,7 @@
     generate_context_binary,
     ModuleQConfig,
     prepare_pt2e,
+    QnnExecuTorchBackendType,
     QuantDtype,
     TestQNN,
     validate_context_binary,
@@ -47,6 +48,7 @@
     capture_program,
     dump_context_from_pte,
     from_context_binary,
+    generate_gpu_compiler_spec,
     generate_htp_compiler_spec,
     generate_qnn_executorch_compiler_spec,
     is_qnn_sdk_version_less_than,
@@ -91,9 +93,16 @@
 class TestQNNFloatingPointOperator(TestQNN):
     # TODO: refactor to support different backends
     def setUp(self):
+        match self.get_backend_type():
+            case QnnExecuTorchBackendType.kHtpBackend:
+                backend_options = generate_htp_compiler_spec(use_fp16=True)
+            case QnnExecuTorchBackendType.kGpuBackend:
+                backend_options = generate_gpu_compiler_spec()
+            case _:
+                raise ValueError("Backend is not implemented yet")
+
         TestQNN.atol = 1e-1
         TestQNN.rtol = 1e-1
-        backend_options = generate_htp_compiler_spec(use_fp16=True)
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
             soc_model=self.chipset_table[TestQNN.model],
             backend_options=backend_options,
@@ -8646,6 +8655,13 @@ def setup_environment():
         default="",
         type=str,
     )
+    parser.add_argument(
+        "--backend",
+        help="Backend to be deployed ('htp'/'gpu' are currently supported).",
+        choices=["htp", "gpu"],
+        default="htp",
+        type=str,
+    )
     parser.add_argument(
         "--llama_artifacts",
         help="A folder that contains: weight, tokenizer, and params.",
@@ -8676,6 +8692,7 @@ def setup_environment():
     TestQNN.llama_artifacts = args.llama_artifacts
     TestQNN.op_package_dir = args.op_package_dir
     TestQNN.target = args.target
+    TestQNN.backend = args.backend
     return sys.argv[:1] + ns_args
 
 
diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py
index e846379962d..f7b4f25f7fc 100644
--- a/backends/qualcomm/tests/utils.py
+++ b/backends/qualcomm/tests/utils.py
@@ -17,7 +17,10 @@
 from executorch.backends.qualcomm.builders.node_visitor import dq_ops
 from executorch.backends.qualcomm.qnn_preprocess import QnnBackend
 from executorch.backends.qualcomm.quantizer.quantizer import ModuleQConfig, QuantDtype
-from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset
+from executorch.backends.qualcomm.serialization.qc_schema import (
+    QcomChipset,
+    QnnExecuTorchBackendType,
+)
 from executorch.backends.qualcomm.utils.constants import (
     QCOM_DTYPE,
     QCOM_PASS_ACTIVATE_KEY,
@@ -165,6 +168,7 @@ class TestQNN(unittest.TestCase):
     op_package_dir: str = ""
     target: str = ""
     model_name: str = ""
+    backend: str = ""
     online_prepare: bool = False
     use_8a8w: str = "8a8w"
     use_16a16w: str = "16a16w"
@@ -178,8 +182,6 @@ class TestQNN(unittest.TestCase):
     dump_intermediate_outputs: bool = False
     inference_speed: float = 0.0
     inference_speed_output_path = "outputs/inference_speed.txt"
-    model_name: str = ""
-    oss_repo: str = ""
 
     def _assert_outputs_equal(self, model_output, ref_output):
         self.assertTrue(len(ref_output) == len(model_output))
@@ -216,6 +218,9 @@ def _save_model_and_expected_output(
 
         return ref_outputs, pte_fname
 
+    def get_backend_type(self):
+        return getattr(QnnExecuTorchBackendType, f"k{self.backend.title()}Backend")
+
     def required_envs(self, conditions=None) -> bool:
         conditions = [] if conditions is None else conditions
         return all(
@@ -416,6 +421,7 @@ def validate_intermediate_tensor():
                     dump_intermediate_outputs=(
                         True if expected_intermediate_events != -1 else False
                     ),
+                    backend=self.get_backend_type(),
                     expected_input_shape=(
                         (tensor.shape for tensor in processed_inputs)
                         if check_io_shape
diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py
index 3922fc217a1..1e952696cb9 100644
--- a/backends/qualcomm/utils/utils.py
+++ b/backends/qualcomm/utils/utils.py
@@ -34,6 +34,8 @@
     QcomChipset,
     QnnExecuTorchBackendOptions,
     QnnExecuTorchBackendType,
+    QnnExecuTorchGpuBackendOptions,
+    QnnExecuTorchGpuPrecision,
     QnnExecuTorchHtpBackendOptions,
     QnnExecuTorchHtpPerformanceMode,
     QnnExecuTorchHtpPrecision,
@@ -934,6 +936,47 @@ def draw_graph(title, path, graph_module: torch.fx.GraphModule):
         f.write(graph.get_dot_graph().create_svg())
 
 
+def generate_gpu_compiler_spec(
+    precision: QnnExecuTorchGpuPrecision = QnnExecuTorchGpuPrecision.kGpuPrecisionUserProvided,
+    use_memory_optimizations: bool = True,
+    use_node_optimizations: bool = True,
+    use_queue_recording: bool = True,
+    use_weight_sharing: bool = False,
+) -> QnnExecuTorchBackendOptions:
+    """
+    Helper function generating backend options for QNN HTP
+
+    Args:
+        precision:
+            kGpuPrecisionFp32 - Sets the precision mode to floating point 32-bit (FP32).
+            kGpuPrecisionFp16 - Sets the precision mode to floating point 16-bit (FP16).
+            kGpuPrecisionHybrid - Sets the precision mode to FP16 for storage and FP32 for calculations.
+            kGpuPrecisionUserProvided - Uses the tensor data type provided by the user.
+        use_memory_optimizations: If true, backend will share NATIVE tensor memory
+            based upon analysis of the network topology.
+        use_node_optimizations: If true, backend will fuse compatible operations into
+            one operation to improve performance.
+        use_queue_recording: If true, backend will use queue recording to improve performance.
+        use_weight_sharing: Used with multiple_graphs, where model size will be
+            reduced when operations have the same weights across multiple graphs.
+
+    Returns:
+        QnnExecuTorchGpuBackendOptions: backend options for QNN GPU.
+    """
+    # TODO: enable performance hint mechanism in runtime and make this as an option
+    gpu_options = QnnExecuTorchGpuBackendOptions()
+    gpu_options.precision = precision
+    gpu_options.use_memory_optimizations = use_memory_optimizations
+    gpu_options.use_node_optimizations = use_node_optimizations
+    gpu_options.use_queue_recording = use_queue_recording
+    gpu_options.use_weight_sharing = use_weight_sharing
+
+    return QnnExecuTorchBackendOptions(
+        backend_type=QnnExecuTorchBackendType.kGpuBackend,
+        gpu_options=gpu_options,
+    )
+
+
 def generate_htp_compiler_spec(
     use_fp16: bool,
     use_dlbc: bool = False,
diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py
index 920bad37ac4..ca6f63a87e3 100755
--- a/examples/qualcomm/utils.py
+++ b/examples/qualcomm/utils.py
@@ -31,6 +31,7 @@
 )
 from executorch.backends.qualcomm.serialization.qc_schema import (
     QcomChipset,
+    QnnExecuTorchBackendType,
     QnnExecuTorchOpPackageOptions,
 )
 from executorch.backends.qualcomm.utils.utils import (
@@ -83,6 +84,7 @@ def __init__(
         dump_intermediate_outputs=False,
         runner="examples/qualcomm/executor_runner/qnn_executor_runner",
         target="aarch64-android",
+        backend=QnnExecuTorchBackendType.kHtpBackend,
         expected_input_shape=None,
         expected_output_shape=None,
     ):
@@ -103,6 +105,7 @@ def __init__(
         self.shared_buffer = shared_buffer
         self.runner = runner
         self.target = target
+        self.backend = backend
         self.expected_input_shape = expected_input_shape
         self.expected_output_shape = expected_output_shape
         self.extra_cmds = ""
@@ -130,9 +133,9 @@ def push(self, inputs=None, input_list=None, files=None, init_env=True):
             self._adb(["shell", f"rm -rf {self.workspace}"])
             self._adb(["shell", f"mkdir -p {self.workspace}"])
 
-            # necessary artifacts
-            artifacts = [
-                *self.pte_path,
+        # necessary artifacts
+        artifacts = {
+            QnnExecuTorchBackendType.kHtpBackend: [
                 f"{self.qnn_sdk}/lib/{self.target}/libQnnHtp.so",
                 (
                     f"{self.qnn_sdk}/lib/hexagon-v{self.htp_arch}/"
@@ -143,11 +146,21 @@ def push(self, inputs=None, input_list=None, files=None, init_env=True):
                     f"libQnnHtpV{self.htp_arch}Stub.so"
                 ),
                 f"{self.qnn_sdk}/lib/{self.target}/libQnnHtpPrepare.so",
+            ],
+            QnnExecuTorchBackendType.kGpuBackend: [
+                f"{self.qnn_sdk}/lib/{self.target}/libQnnGpu.so",
+            ],
+        }[self.backend]
+
+        artifacts.extend(
+            [
+                *self.pte_path,
                 f"{self.qnn_sdk}/lib/{self.target}/libQnnSystem.so",
                 f"{self.build_path}/{self.runner}",
                 f"{self.build_path}/backends/qualcomm/libqnn_executorch_backend.so",
                 f"{self.qnn_sdk}/lib/{self.target}/libQnnModelDlc.so",
             ]
+        )
         with tempfile.TemporaryDirectory() as tmp_dir:
             input_list_file, input_files = generate_inputs(
                 tmp_dir, self.input_list_filename, inputs