ankan-ban · ankan-ban · Apr 21, 2025 · Apr 21, 2025
diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake
@@ -914,6 +914,16 @@ if (onnxruntime_USE_TENSORRT)
   )
 endif()
 
+if (onnxruntime_USE_NV)
+  add_custom_command(
+    TARGET onnxruntime_pybind11_state POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E copy
+        $<TARGET_FILE:onnxruntime_providers_nv_tensorrt_rtx>
+        $<TARGET_FILE:onnxruntime_providers_shared>
+        $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
+  )
+endif()
+
 if (onnxruntime_USE_MIGRAPHX)
   add_custom_command(
     TARGET onnxruntime_pybind11_state POST_BUILD

diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1994,6 +1994,10 @@ std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Creat
   return s_library_tensorrt.Get().CreateExecutionProviderFactory(provider_options);
 }
 
+std::shared_ptr<IExecutionProviderFactory> NvProviderFactoryCreator::Create(int device_id) {
+  return s_library_nv.Get().CreateExecutionProviderFactory(device_id);
+}
+
 std::shared_ptr<IExecutionProviderFactory> NvProviderFactoryCreator::Create(const OrtNvTensorRtRtxProviderOptions* provider_options) {
   return s_library_nv.Get().CreateExecutionProviderFactory(provider_options);
 }

diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py
@@ -507,6 +507,23 @@ def _create_inference_session(self, providers, provider_options, disabled_optimi
                 self._fallback_providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
             else:
                 self._fallback_providers = ["CPUExecutionProvider"]
+        if "NvTensorRTRTXExecutionProvider" in available_providers:
+            if (
+                providers
+                and any(
+                    provider == "CUDAExecutionProvider"
+                    or (isinstance(provider, tuple) and provider[0] == "CUDAExecutionProvider")
+                    for provider in providers
+                )
+                and any(
+                    provider == "NvTensorRTRTXExecutionProvider"
+                    or (isinstance(provider, tuple) and provider[0] == "NvExecutionProvider")
+                    for provider in providers
+                )
+            ):
+                self._fallback_providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+            else:
+                self._fallback_providers = ["CPUExecutionProvider"]
         # MIGraphX can fall back to ROCM if it's explicitly assigned. All others fall back to CPU.
         elif "MIGraphXExecutionProvider" in available_providers:
             if providers and any(
@@ -582,6 +599,15 @@ def _register_ep_custom_ops(self, session_options, providers, provider_options,
             ):
                 C.register_tensorrt_plugins_as_custom_ops(session_options, providers[i][1])
 
+            if providers[i] in available_providers and providers[i] == "NvTensorRTRTXExecutionProvider":
+                C.register_nv_tensorrt_rtx_plugins_as_custom_ops(session_options, provider_options[i])
+            elif (
+                isinstance(providers[i], tuple)
+                and providers[i][0] in available_providers
+                and providers[i][0] == "NvTensorrtRTXExecutionProvider"
+            ):
+                C.register_nv_tensorrt_rtx_plugins_as_custom_ops(session_options, providers[i][1])
+
 
 class IOBinding:
     """

diff --git a/onnxruntime/python/onnxruntime_pybind_schema.cc b/onnxruntime/python/onnxruntime_pybind_schema.cc
@@ -48,6 +48,9 @@ void addGlobalSchemaFunctions(pybind11::module& m) {
 #ifdef USE_TENSORRT
             onnxruntime::TensorrtProviderFactoryCreator::Create(0),
 #endif
+#ifdef USE_NV
+            onnxruntime::NvProviderFactoryCreator::Create(0),
+#endif
 #ifdef USE_MIGRAPHX
             onnxruntime::MIGraphXProviderFactoryCreator::Create(0),
 #endif

diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -27,6 +27,7 @@
 #include "core/platform/env.h"
 #include "core/providers/get_execution_providers.h"
 #include "core/providers/tensorrt/tensorrt_provider_options.h"
+#include "core/providers/nv_tensorrt_rtx/nv_provider_options_internal.h"
 #include "core/session/IOBinding.h"
 #include "core/session/abi_session_options_impl.h"
 #include "core/session/onnxruntime_session_options_config_keys.h"
@@ -507,6 +508,38 @@ void RegisterTensorRTPluginsAsCustomOps(PySessionOptions& so, const ProviderOpti
 }
 #endif
 
+#ifdef USE_NV
+void RegisterNvTensorRTRtxPluginsAsCustomOps(PySessionOptions& so, const ProviderOptions& options) {
+  if (auto* nv_tensorrt_rtx_provider_info = TryGetProviderInfo_Nv()) {
+    auto is_already_in_domains = [&](std::string& domain_name, std::vector<OrtCustomOpDomain*>& domains) {
+      for (auto ptr : domains) {
+        if (domain_name == ptr->domain_) {
+          return true;
+        }
+      }
+      return false;
+    };
+
+    std::string extra_plugin_lib_paths = "";
+    const auto it = options.find("extra_plugin_lib_paths");
+    if (it != options.end()) {
+      extra_plugin_lib_paths = it->second;
+    }
+    std::vector<OrtCustomOpDomain*> custom_op_domains;
+    nv_tensorrt_rtx_provider_info->GetTensorRTCustomOpDomainList(custom_op_domains, extra_plugin_lib_paths);
+    for (auto ptr : custom_op_domains) {
+      if (!is_already_in_domains(ptr->domain_, so.custom_op_domains_)) {
+        so.custom_op_domains_.push_back(ptr);
+      } else {
+        LOGS_DEFAULT(WARNING) << "The custom op domain name " << ptr->domain_ << " is already in session option.";
+      }
+    }
+  } else {
+    ORT_THROW("Please install TensorRT libraries as mentioned in the GPU requirements page, make sure they're in the PATH or LD_LIBRARY_PATH, and that your GPU is supported.");
+  }
+}
+#endif
+
 std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
     const SessionOptions& session_options,
     const std::string& type,
@@ -851,6 +884,99 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
                           << "https://onnxruntime.ai/docs/execution-providers/"
                           << "TensorRT-ExecutionProvider.html#requirements to ensure all dependencies are met.";
 #endif
+
+  } else if (type == kNvTensorRTRTXExecutionProvider) {
+#ifdef USE_NV
+    if (Env::Default().GetEnvironmentVar("ORT_NV_TENSORRT_RTX_UNAVAILABLE").empty()) {
+      auto it = provider_options_map.find(type);
+      if (it != provider_options_map.end()) {
+        OrtNvTensorRtRtxProviderOptions params;
+        for (auto option : it->second) {
+          if (option.first == "device_id") {
+            if (!option.second.empty()) {
+              params.device_id = std::stoi(option.second);
+            } else {
+              ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'device_id' should be a number i.e. '0'.\n");
+            }
+          } else if (option.first == "user_compute_stream") {
+            if (!option.second.empty()) {
+              auto stream = std::stoull(option.second, nullptr, 0);
+              params.user_compute_stream = reinterpret_cast<void*>(stream);
+              params.has_user_compute_stream = true;
+            } else {
+              params.has_user_compute_stream = false;
+              ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'user_compute_stream' should be a string to define the compute stream for the inference to run on.\n");
+            }
+          } else if (option.first == "dump_subgraphs") {
+            if (option.second == "True" || option.second == "true") {
+              params.dump_subgraphs = true;
+            } else if (option.second == "False" || option.second == "false") {
+              params.dump_subgraphs = false;
+            } else {
+              ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'dump_subgraphs' should be 'True' or 'False'. Default value is 'False'.\n");
+            }
+          } else if (option.first == "max_workspace_size") {
+            if (!option.second.empty()) {
+              params.max_workspace_size = std::stoull(option.second);
+            } else {
+              ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'max_workspace_size' should be a number in byte i.e. '1073741824'.\n");
+            }
+          } else if (option.first == "detailed_build_log") {
+            if (option.second == "True" || option.second == "true") {
+              params.detailed_build_log = true;
+            } else if (option.second == "False" || option.second == "false") {
+              params.detailed_build_log = false;
+            } else {
+              ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'detailed_build_log' should be 'True' or 'False'. Default value is 'False'.\n");
+            }
+          } else if (option.first == "profile_min_shapes") {
+            if (!option.second.empty()) {
+              std::string min_profile = option.second;
+              params.profile_min_shapes = min_profile.c_str();
+            } else {
+              ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'profile_min_shapes' should be a string of 'input1:dim1xdimd2...,input2:dim1xdim2...,...'.\n");
+            }
+          } else if (option.first == "profile_max_shapes") {
+            if (!option.second.empty()) {
+              std::string max_profile = option.second;
+              params.profile_max_shapes = max_profile.c_str();
+            } else {
+              ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'profile_max_shapes' should be a string of 'input1:dim1xdimd2...,input2:dim1xdim2...,...'.\n");
+            }
+          } else if (option.first == "profile_opt_shapes") {
+            if (!option.second.empty()) {
+              std::string opt_profile = option.second;
+              params.profile_opt_shapes = opt_profile.c_str();
+            } else {
+              ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'profile_opt_shapes' should be a string of 'input1:dim1xdimd2...,input2:dim1xdim2...,...'.\n");
+            }
+          } else if (option.first == "cuda_graph_enable") {
+            if (option.second == "True" || option.second == "true") {
+              params.cuda_graph_enable = true;
+            } else if (option.second == "False" || option.second == "false") {
+              params.cuda_graph_enable = false;
+            } else {
+              ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'cuda_graph_enable' should be 'True' or 'False'. Default value is 'False'.\n");
+            }
+          } else {
+            ORT_THROW("Invalid NV_TensorRT_RTX EP option: ", option.first);
+          }
+        }
+        if (std::shared_ptr<IExecutionProviderFactory> nv_tensorrt_rtx_provider_factory = onnxruntime::NvProviderFactoryCreator::Create(&params)) {
+          return nv_tensorrt_rtx_provider_factory->CreateProvider();
+        }
+      } else {
+        if (std::shared_ptr<IExecutionProviderFactory> nv_tensorrt_rtx_provider_factory = onnxruntime::NvProviderFactoryCreator::Create(cuda_device_id)) {
+          return nv_tensorrt_rtx_provider_factory->CreateProvider();
+        }
+      }
+    }
+    LOGS_DEFAULT(WARNING) << "Failed to create "
+                          << type
+                          << ". Please reference "
+                          << "https://onnxruntime.ai/docs/execution-providers/"
+                          << "TensorRT-ExecutionProvider.html#requirements to ensure all dependencies are met.";
+#endif
   } else if (type == kMIGraphXExecutionProvider) {
 #ifdef USE_MIGRAPHX
     std::string calibration_table;
@@ -1532,6 +1658,12 @@ void addGlobalMethods(py::module& m) {
       "Register TensorRT plugins as custom ops.");
 #endif
 
+#ifdef USE_NV
+  m.def(
+      "register_nv_tensorrt_rtx_plugins_as_custom_ops", [](PySessionOptions& so, const ProviderOptions& options) { RegisterNvTensorRTRtxPluginsAsCustomOps(so, options); },
+      "Register NV TensorRT RTX plugins as custom ops.");
+#endif
+
 #ifdef ENABLE_ATEN
   m.def("register_aten_op_executor",
         [](const std::string& is_tensor_argument_address_str, const std::string& aten_op_executor_address_str) -> void {

diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h
@@ -29,6 +29,7 @@ struct OrtStatus {
 #include "core/providers/providers.h"
 #include "core/providers/provider_factory_creators.h"
 #include "core/providers/tensorrt/tensorrt_provider_options.h"
+#include "core/providers/nv_tensorrt_rtx/nv_provider_options.h"
 
 #if defined(USE_CUDA) || defined(USE_ROCM)
 #define BACKEND_PROC "GPU"
@@ -122,6 +123,9 @@ struct OrtStatus {
 #ifdef USE_TENSORRT
 #include "core/providers/tensorrt/tensorrt_provider_factory.h"
 #endif
+#ifdef USE_NV
+#include "core/providers/nv_tensorrt_rtx/nv_provider_factory.h"
+#endif
 #ifdef USE_MIGRAPHX
 #include "core/providers/migraphx/migraphx_provider_factory.h"
 #endif
@@ -173,6 +177,13 @@ ProviderInfo_TensorRT& GetProviderInfo_TensorRT();
 }  // namespace onnxruntime
 #endif
 
+#ifdef USE_NV
+namespace onnxruntime {
+ProviderInfo_Nv* TryGetProviderInfo_Nv();
+ProviderInfo_Nv& GetProviderInfo_Nv();
+}  // namespace onnxruntime
+#endif
+
 #ifdef USE_CANN
 namespace onnxruntime {
 ProviderInfo_CANN* TryGetProviderInfo_CANN();

diff --git a/setup.py b/setup.py
@@ -191,6 +191,17 @@ def _rewrite_ld_preload_tensorrt(self, to_preload):
                     f.write("    import os\n")
                     f.write('    os.environ["ORT_TENSORRT_UNAVAILABLE"] = "1"\n')
 
+        def _rewrite_ld_preload_nv_tensorrt_rtx(self, to_preload):
+            with open("onnxruntime/capi/_ld_preload.py", "a", encoding="ascii") as f:
+                if len(to_preload) > 0:
+                    f.write("from ctypes import CDLL, RTLD_GLOBAL\n")
+                    f.write("try:\n")
+                    for library in to_preload:
+                        f.write('    _{} = CDLL("{}", mode=RTLD_GLOBAL)\n'.format(library.split(".")[0], library))
+                    f.write("except OSError:\n")
+                    f.write("    import os\n")
+                    f.write('    os.environ["ORT_NV_TENSORRT_RTX_UNAVAILABLE"] = "1"\n')
+
         def run(self):
             if is_manylinux:
                 source = "onnxruntime/capi/onnxruntime_pybind11_state.so"
@@ -201,6 +212,7 @@ def run(self):
                 to_preload = []
                 to_preload_cuda = []
                 to_preload_tensorrt = []
+                to_preload_nv_tensorrt_rtx = []
                 to_preload_cann = []
 
                 cuda_dependencies = [
@@ -268,6 +280,7 @@ def run(self):
                 self._rewrite_ld_preload(to_preload)
                 self._rewrite_ld_preload_cuda(to_preload_cuda)
                 self._rewrite_ld_preload_tensorrt(to_preload_tensorrt)
+                self._rewrite_ld_preload_tensorrt(to_preload_nv_tensorrt_rtx)
                 self._rewrite_ld_preload(to_preload_cann)
 
             else:
@@ -303,6 +316,7 @@ def finalize_options(self):
 
 providers_cuda_or_rocm = "onnxruntime_providers_" + ("rocm" if is_rocm else "cuda")
 providers_tensorrt_or_migraphx = "onnxruntime_providers_" + ("migraphx" if is_migraphx else "tensorrt")
+providers_nv_tensorrt_rtx = "onnxruntime_providers_nv_tensorrt_rtx"
 providers_openvino = "onnxruntime_providers_openvino"
 providers_cann = "onnxruntime_providers_cann"
 providers_qnn = "onnxruntime_providers_qnn"
@@ -316,6 +330,7 @@ def finalize_options(self):
 elif platform.system() == "Windows":
     providers_cuda_or_rocm = providers_cuda_or_rocm + ".dll"
     providers_tensorrt_or_migraphx = providers_tensorrt_or_migraphx + ".dll"
+    providers_nv_tensorrt_rtx = providers_nv_tensorrt_rtx + ".dll"
     providers_openvino = providers_openvino + ".dll"
     providers_cann = providers_cann + ".dll"
     providers_qnn = providers_qnn + ".dll"
@@ -384,13 +399,15 @@ def finalize_options(self):
         "libiomp5md.dll",
         providers_cuda_or_rocm,
         providers_tensorrt_or_migraphx,
+        providers_nv_tensorrt_rtx,
         providers_cann,
         "onnxruntime.dll",
     ]
     # DNNL, TensorRT, OpenVINO, and QNN EPs are built as shared libs
     libs.extend(["onnxruntime_providers_shared.dll"])
     libs.extend(["onnxruntime_providers_dnnl.dll"])
     libs.extend(["onnxruntime_providers_tensorrt.dll"])
+    libs.extend(["onnxruntime_providers_nv_tensorrt_rtx.dll"])
     libs.extend(["onnxruntime_providers_openvino.dll"])
     libs.extend(["onnxruntime_providers_cuda.dll"])
     libs.extend(["onnxruntime_providers_vitisai.dll"])