Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions cmake/onnxruntime_python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -914,6 +914,16 @@ if (onnxruntime_USE_TENSORRT)
)
endif()

if (onnxruntime_USE_NV)
add_custom_command(
TARGET onnxruntime_pybind11_state POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
$<TARGET_FILE:onnxruntime_providers_nv_tensorrt_rtx>
$<TARGET_FILE:onnxruntime_providers_shared>
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
)
endif()

if (onnxruntime_USE_MIGRAPHX)
add_custom_command(
TARGET onnxruntime_pybind11_state POST_BUILD
Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/core/session/provider_bridge_ort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1994,6 +1994,10 @@ std::shared_ptr<IExecutionProviderFactory> TensorrtProviderFactoryCreator::Creat
return s_library_tensorrt.Get().CreateExecutionProviderFactory(provider_options);
}

std::shared_ptr<IExecutionProviderFactory> NvProviderFactoryCreator::Create(int device_id) {
return s_library_nv.Get().CreateExecutionProviderFactory(device_id);
}

std::shared_ptr<IExecutionProviderFactory> NvProviderFactoryCreator::Create(const OrtNvTensorRtRtxProviderOptions* provider_options) {
return s_library_nv.Get().CreateExecutionProviderFactory(provider_options);
}
Expand Down
26 changes: 26 additions & 0 deletions onnxruntime/python/onnxruntime_inference_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,23 @@ def _create_inference_session(self, providers, provider_options, disabled_optimi
self._fallback_providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
else:
self._fallback_providers = ["CPUExecutionProvider"]
if "NvTensorRTRTXExecutionProvider" in available_providers:
if (
providers
and any(
provider == "CUDAExecutionProvider"
or (isinstance(provider, tuple) and provider[0] == "CUDAExecutionProvider")
for provider in providers
)
and any(
provider == "NvTensorRTRTXExecutionProvider"
or (isinstance(provider, tuple) and provider[0] == "NvExecutionProvider")
for provider in providers
)
):
self._fallback_providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
else:
self._fallback_providers = ["CPUExecutionProvider"]
# MIGraphX can fall back to ROCM if it's explicitly assigned. All others fall back to CPU.
elif "MIGraphXExecutionProvider" in available_providers:
if providers and any(
Expand Down Expand Up @@ -582,6 +599,15 @@ def _register_ep_custom_ops(self, session_options, providers, provider_options,
):
C.register_tensorrt_plugins_as_custom_ops(session_options, providers[i][1])

if providers[i] in available_providers and providers[i] == "NvTensorRTRTXExecutionProvider":
C.register_nv_tensorrt_rtx_plugins_as_custom_ops(session_options, provider_options[i])
elif (
isinstance(providers[i], tuple)
and providers[i][0] in available_providers
and providers[i][0] == "NvTensorrtRTXExecutionProvider"
):
C.register_nv_tensorrt_rtx_plugins_as_custom_ops(session_options, providers[i][1])


class IOBinding:
"""
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/python/onnxruntime_pybind_schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ void addGlobalSchemaFunctions(pybind11::module& m) {
#ifdef USE_TENSORRT
onnxruntime::TensorrtProviderFactoryCreator::Create(0),
#endif
#ifdef USE_NV
onnxruntime::NvProviderFactoryCreator::Create(0),
#endif
#ifdef USE_MIGRAPHX
onnxruntime::MIGraphXProviderFactoryCreator::Create(0),
#endif
Expand Down
132 changes: 132 additions & 0 deletions onnxruntime/python/onnxruntime_pybind_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "core/platform/env.h"
#include "core/providers/get_execution_providers.h"
#include "core/providers/tensorrt/tensorrt_provider_options.h"
#include "core/providers/nv_tensorrt_rtx/nv_provider_options_internal.h"
#include "core/session/IOBinding.h"
#include "core/session/abi_session_options_impl.h"
#include "core/session/onnxruntime_session_options_config_keys.h"
Expand Down Expand Up @@ -507,6 +508,38 @@ void RegisterTensorRTPluginsAsCustomOps(PySessionOptions& so, const ProviderOpti
}
#endif

#ifdef USE_NV
void RegisterNvTensorRTRtxPluginsAsCustomOps(PySessionOptions& so, const ProviderOptions& options) {
if (auto* nv_tensorrt_rtx_provider_info = TryGetProviderInfo_Nv()) {
auto is_already_in_domains = [&](std::string& domain_name, std::vector<OrtCustomOpDomain*>& domains) {
for (auto ptr : domains) {
if (domain_name == ptr->domain_) {
return true;
}
}
return false;
};

std::string extra_plugin_lib_paths = "";
const auto it = options.find("extra_plugin_lib_paths");
if (it != options.end()) {
extra_plugin_lib_paths = it->second;
}
std::vector<OrtCustomOpDomain*> custom_op_domains;
nv_tensorrt_rtx_provider_info->GetTensorRTCustomOpDomainList(custom_op_domains, extra_plugin_lib_paths);
for (auto ptr : custom_op_domains) {
if (!is_already_in_domains(ptr->domain_, so.custom_op_domains_)) {
so.custom_op_domains_.push_back(ptr);
} else {
LOGS_DEFAULT(WARNING) << "The custom op domain name " << ptr->domain_ << " is already in session option.";
}
}
} else {
ORT_THROW("Please install TensorRT libraries as mentioned in the GPU requirements page, make sure they're in the PATH or LD_LIBRARY_PATH, and that your GPU is supported.");
}
}
#endif

std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
const SessionOptions& session_options,
const std::string& type,
Expand Down Expand Up @@ -851,6 +884,99 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
<< "https://onnxruntime.ai/docs/execution-providers/"
<< "TensorRT-ExecutionProvider.html#requirements to ensure all dependencies are met.";
#endif

} else if (type == kNvTensorRTRTXExecutionProvider) {
#ifdef USE_NV
if (Env::Default().GetEnvironmentVar("ORT_NV_TENSORRT_RTX_UNAVAILABLE").empty()) {
auto it = provider_options_map.find(type);
if (it != provider_options_map.end()) {
OrtNvTensorRtRtxProviderOptions params;
for (auto option : it->second) {
if (option.first == "device_id") {
if (!option.second.empty()) {
params.device_id = std::stoi(option.second);
} else {
ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'device_id' should be a number i.e. '0'.\n");
}
} else if (option.first == "user_compute_stream") {
if (!option.second.empty()) {
auto stream = std::stoull(option.second, nullptr, 0);
params.user_compute_stream = reinterpret_cast<void*>(stream);
params.has_user_compute_stream = true;
} else {
params.has_user_compute_stream = false;
ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'user_compute_stream' should be a string to define the compute stream for the inference to run on.\n");
}
} else if (option.first == "dump_subgraphs") {
if (option.second == "True" || option.second == "true") {
params.dump_subgraphs = true;
} else if (option.second == "False" || option.second == "false") {
params.dump_subgraphs = false;
} else {
ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'dump_subgraphs' should be 'True' or 'False'. Default value is 'False'.\n");
}
} else if (option.first == "max_workspace_size") {
if (!option.second.empty()) {
params.max_workspace_size = std::stoull(option.second);
} else {
ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'max_workspace_size' should be a number in byte i.e. '1073741824'.\n");
}
} else if (option.first == "detailed_build_log") {
if (option.second == "True" || option.second == "true") {
params.detailed_build_log = true;
} else if (option.second == "False" || option.second == "false") {
params.detailed_build_log = false;
} else {
ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'detailed_build_log' should be 'True' or 'False'. Default value is 'False'.\n");
}
} else if (option.first == "profile_min_shapes") {
if (!option.second.empty()) {
std::string min_profile = option.second;
params.profile_min_shapes = min_profile.c_str();
} else {
ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'profile_min_shapes' should be a string of 'input1:dim1xdimd2...,input2:dim1xdim2...,...'.\n");
}
} else if (option.first == "profile_max_shapes") {
if (!option.second.empty()) {
std::string max_profile = option.second;
params.profile_max_shapes = max_profile.c_str();
} else {
ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'profile_max_shapes' should be a string of 'input1:dim1xdimd2...,input2:dim1xdim2...,...'.\n");
}
} else if (option.first == "profile_opt_shapes") {
if (!option.second.empty()) {
std::string opt_profile = option.second;
params.profile_opt_shapes = opt_profile.c_str();
} else {
ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'profile_opt_shapes' should be a string of 'input1:dim1xdimd2...,input2:dim1xdim2...,...'.\n");
}
} else if (option.first == "cuda_graph_enable") {
if (option.second == "True" || option.second == "true") {
params.cuda_graph_enable = true;
} else if (option.second == "False" || option.second == "false") {
params.cuda_graph_enable = false;
} else {
ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'cuda_graph_enable' should be 'True' or 'False'. Default value is 'False'.\n");
}
} else {
ORT_THROW("Invalid NV_TensorRT_RTX EP option: ", option.first);
}
}
if (std::shared_ptr<IExecutionProviderFactory> nv_tensorrt_rtx_provider_factory = onnxruntime::NvProviderFactoryCreator::Create(&params)) {
return nv_tensorrt_rtx_provider_factory->CreateProvider();
}
} else {
if (std::shared_ptr<IExecutionProviderFactory> nv_tensorrt_rtx_provider_factory = onnxruntime::NvProviderFactoryCreator::Create(cuda_device_id)) {
return nv_tensorrt_rtx_provider_factory->CreateProvider();
}
}
}
LOGS_DEFAULT(WARNING) << "Failed to create "
<< type
<< ". Please reference "
<< "https://onnxruntime.ai/docs/execution-providers/"
<< "TensorRT-ExecutionProvider.html#requirements to ensure all dependencies are met.";
#endif
} else if (type == kMIGraphXExecutionProvider) {
#ifdef USE_MIGRAPHX
std::string calibration_table;
Expand Down Expand Up @@ -1532,6 +1658,12 @@ void addGlobalMethods(py::module& m) {
"Register TensorRT plugins as custom ops.");
#endif

#ifdef USE_NV
m.def(
"register_nv_tensorrt_rtx_plugins_as_custom_ops", [](PySessionOptions& so, const ProviderOptions& options) { RegisterNvTensorRTRtxPluginsAsCustomOps(so, options); },
"Register NV TensorRT RTX plugins as custom ops.");
#endif

#ifdef ENABLE_ATEN
m.def("register_aten_op_executor",
[](const std::string& is_tensor_argument_address_str, const std::string& aten_op_executor_address_str) -> void {
Expand Down
11 changes: 11 additions & 0 deletions onnxruntime/python/onnxruntime_pybind_state_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ struct OrtStatus {
#include "core/providers/providers.h"
#include "core/providers/provider_factory_creators.h"
#include "core/providers/tensorrt/tensorrt_provider_options.h"
#include "core/providers/nv_tensorrt_rtx/nv_provider_options.h"

#if defined(USE_CUDA) || defined(USE_ROCM)
#define BACKEND_PROC "GPU"
Expand Down Expand Up @@ -122,6 +123,9 @@ struct OrtStatus {
#ifdef USE_TENSORRT
#include "core/providers/tensorrt/tensorrt_provider_factory.h"
#endif
#ifdef USE_NV
#include "core/providers/nv_tensorrt_rtx/nv_provider_factory.h"
#endif
#ifdef USE_MIGRAPHX
#include "core/providers/migraphx/migraphx_provider_factory.h"
#endif
Expand Down Expand Up @@ -173,6 +177,13 @@ ProviderInfo_TensorRT& GetProviderInfo_TensorRT();
} // namespace onnxruntime
#endif

#ifdef USE_NV
namespace onnxruntime {
ProviderInfo_Nv* TryGetProviderInfo_Nv();
ProviderInfo_Nv& GetProviderInfo_Nv();
} // namespace onnxruntime
#endif

#ifdef USE_CANN
namespace onnxruntime {
ProviderInfo_CANN* TryGetProviderInfo_CANN();
Expand Down
17 changes: 17 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,17 @@ def _rewrite_ld_preload_tensorrt(self, to_preload):
f.write(" import os\n")
f.write(' os.environ["ORT_TENSORRT_UNAVAILABLE"] = "1"\n')

def _rewrite_ld_preload_nv_tensorrt_rtx(self, to_preload):
with open("onnxruntime/capi/_ld_preload.py", "a", encoding="ascii") as f:
if len(to_preload) > 0:
f.write("from ctypes import CDLL, RTLD_GLOBAL\n")
f.write("try:\n")
for library in to_preload:
f.write(' _{} = CDLL("{}", mode=RTLD_GLOBAL)\n'.format(library.split(".")[0], library))
f.write("except OSError:\n")
f.write(" import os\n")
f.write(' os.environ["ORT_NV_TENSORRT_RTX_UNAVAILABLE"] = "1"\n')

def run(self):
if is_manylinux:
source = "onnxruntime/capi/onnxruntime_pybind11_state.so"
Expand All @@ -201,6 +212,7 @@ def run(self):
to_preload = []
to_preload_cuda = []
to_preload_tensorrt = []
to_preload_nv_tensorrt_rtx = []
to_preload_cann = []

cuda_dependencies = [
Expand Down Expand Up @@ -268,6 +280,7 @@ def run(self):
self._rewrite_ld_preload(to_preload)
self._rewrite_ld_preload_cuda(to_preload_cuda)
self._rewrite_ld_preload_tensorrt(to_preload_tensorrt)
self._rewrite_ld_preload_tensorrt(to_preload_nv_tensorrt_rtx)
self._rewrite_ld_preload(to_preload_cann)

else:
Expand Down Expand Up @@ -303,6 +316,7 @@ def finalize_options(self):

providers_cuda_or_rocm = "onnxruntime_providers_" + ("rocm" if is_rocm else "cuda")
providers_tensorrt_or_migraphx = "onnxruntime_providers_" + ("migraphx" if is_migraphx else "tensorrt")
providers_nv_tensorrt_rtx = "onnxruntime_providers_nv_tensorrt_rtx"
providers_openvino = "onnxruntime_providers_openvino"
providers_cann = "onnxruntime_providers_cann"
providers_qnn = "onnxruntime_providers_qnn"
Expand All @@ -316,6 +330,7 @@ def finalize_options(self):
elif platform.system() == "Windows":
providers_cuda_or_rocm = providers_cuda_or_rocm + ".dll"
providers_tensorrt_or_migraphx = providers_tensorrt_or_migraphx + ".dll"
providers_nv_tensorrt_rtx = providers_nv_tensorrt_rtx + ".dll"
providers_openvino = providers_openvino + ".dll"
providers_cann = providers_cann + ".dll"
providers_qnn = providers_qnn + ".dll"
Expand Down Expand Up @@ -384,13 +399,15 @@ def finalize_options(self):
"libiomp5md.dll",
providers_cuda_or_rocm,
providers_tensorrt_or_migraphx,
providers_nv_tensorrt_rtx,
providers_cann,
"onnxruntime.dll",
]
# DNNL, TensorRT, OpenVINO, and QNN EPs are built as shared libs
libs.extend(["onnxruntime_providers_shared.dll"])
libs.extend(["onnxruntime_providers_dnnl.dll"])
libs.extend(["onnxruntime_providers_tensorrt.dll"])
libs.extend(["onnxruntime_providers_nv_tensorrt_rtx.dll"])
libs.extend(["onnxruntime_providers_openvino.dll"])
libs.extend(["onnxruntime_providers_cuda.dll"])
libs.extend(["onnxruntime_providers_vitisai.dll"])
Expand Down