diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake index ca65c02a40c3b..b629a89a9f070 100644 --- a/cmake/onnxruntime_python.cmake +++ b/cmake/onnxruntime_python.cmake @@ -914,6 +914,16 @@ if (onnxruntime_USE_TENSORRT) ) endif() +if (onnxruntime_USE_NV) + add_custom_command( + TARGET onnxruntime_pybind11_state POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + $ + $ + $/onnxruntime/capi/ + ) +endif() + if (onnxruntime_USE_MIGRAPHX) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 68ffd76257eea..7187c0a454139 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1994,6 +1994,10 @@ std::shared_ptr TensorrtProviderFactoryCreator::Creat return s_library_tensorrt.Get().CreateExecutionProviderFactory(provider_options); } +std::shared_ptr NvProviderFactoryCreator::Create(int device_id) { + return s_library_nv.Get().CreateExecutionProviderFactory(device_id); +} + std::shared_ptr NvProviderFactoryCreator::Create(const OrtNvTensorRtRtxProviderOptions* provider_options) { return s_library_nv.Get().CreateExecutionProviderFactory(provider_options); } diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py index 785eb9c485d25..ed0298a85b8e7 100644 --- a/onnxruntime/python/onnxruntime_inference_collection.py +++ b/onnxruntime/python/onnxruntime_inference_collection.py @@ -507,6 +507,23 @@ def _create_inference_session(self, providers, provider_options, disabled_optimi self._fallback_providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] else: self._fallback_providers = ["CPUExecutionProvider"] + if "NvTensorRTRTXExecutionProvider" in available_providers: + if ( + providers + and any( + provider == "CUDAExecutionProvider" + or (isinstance(provider, tuple) and provider[0] == "CUDAExecutionProvider") + for provider in providers + ) + and any( + provider == "NvTensorRTRTXExecutionProvider" + or (isinstance(provider, tuple) and provider[0] == "NvExecutionProvider") + for provider in providers + ) + ): + self._fallback_providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] + else: + self._fallback_providers = ["CPUExecutionProvider"] # MIGraphX can fall back to ROCM if it's explicitly assigned. All others fall back to CPU. elif "MIGraphXExecutionProvider" in available_providers: if providers and any( @@ -582,6 +599,15 @@ def _register_ep_custom_ops(self, session_options, providers, provider_options, ): C.register_tensorrt_plugins_as_custom_ops(session_options, providers[i][1]) + if providers[i] in available_providers and providers[i] == "NvTensorRTRTXExecutionProvider": + C.register_nv_tensorrt_rtx_plugins_as_custom_ops(session_options, provider_options[i]) + elif ( + isinstance(providers[i], tuple) + and providers[i][0] in available_providers + and providers[i][0] == "NvTensorrtRTXExecutionProvider" + ): + C.register_nv_tensorrt_rtx_plugins_as_custom_ops(session_options, providers[i][1]) + class IOBinding: """ diff --git a/onnxruntime/python/onnxruntime_pybind_schema.cc b/onnxruntime/python/onnxruntime_pybind_schema.cc index 958da26f4faf0..cd1d2a8da10aa 100644 --- a/onnxruntime/python/onnxruntime_pybind_schema.cc +++ b/onnxruntime/python/onnxruntime_pybind_schema.cc @@ -48,6 +48,9 @@ void addGlobalSchemaFunctions(pybind11::module& m) { #ifdef USE_TENSORRT onnxruntime::TensorrtProviderFactoryCreator::Create(0), #endif +#ifdef USE_NV + onnxruntime::NvProviderFactoryCreator::Create(0), +#endif #ifdef USE_MIGRAPHX onnxruntime::MIGraphXProviderFactoryCreator::Create(0), #endif diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index a069cfa0b4713..9f11bc91cdbf4 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -27,6 +27,7 @@ #include "core/platform/env.h" #include "core/providers/get_execution_providers.h" #include "core/providers/tensorrt/tensorrt_provider_options.h" +#include "core/providers/nv_tensorrt_rtx/nv_provider_options_internal.h" #include "core/session/IOBinding.h" #include "core/session/abi_session_options_impl.h" #include "core/session/onnxruntime_session_options_config_keys.h" @@ -507,6 +508,38 @@ void RegisterTensorRTPluginsAsCustomOps(PySessionOptions& so, const ProviderOpti } #endif +#ifdef USE_NV +void RegisterNvTensorRTRtxPluginsAsCustomOps(PySessionOptions& so, const ProviderOptions& options) { + if (auto* nv_tensorrt_rtx_provider_info = TryGetProviderInfo_Nv()) { + auto is_already_in_domains = [&](std::string& domain_name, std::vector& domains) { + for (auto ptr : domains) { + if (domain_name == ptr->domain_) { + return true; + } + } + return false; + }; + + std::string extra_plugin_lib_paths = ""; + const auto it = options.find("extra_plugin_lib_paths"); + if (it != options.end()) { + extra_plugin_lib_paths = it->second; + } + std::vector custom_op_domains; + nv_tensorrt_rtx_provider_info->GetTensorRTCustomOpDomainList(custom_op_domains, extra_plugin_lib_paths); + for (auto ptr : custom_op_domains) { + if (!is_already_in_domains(ptr->domain_, so.custom_op_domains_)) { + so.custom_op_domains_.push_back(ptr); + } else { + LOGS_DEFAULT(WARNING) << "The custom op domain name " << ptr->domain_ << " is already in session option."; + } + } + } else { + ORT_THROW("Please install TensorRT libraries as mentioned in the GPU requirements page, make sure they're in the PATH or LD_LIBRARY_PATH, and that your GPU is supported."); + } +} +#endif + std::unique_ptr CreateExecutionProviderInstance( const SessionOptions& session_options, const std::string& type, @@ -851,6 +884,99 @@ std::unique_ptr CreateExecutionProviderInstance( << "https://onnxruntime.ai/docs/execution-providers/" << "TensorRT-ExecutionProvider.html#requirements to ensure all dependencies are met."; #endif + + } else if (type == kNvTensorRTRTXExecutionProvider) { +#ifdef USE_NV + if (Env::Default().GetEnvironmentVar("ORT_NV_TENSORRT_RTX_UNAVAILABLE").empty()) { + auto it = provider_options_map.find(type); + if (it != provider_options_map.end()) { + OrtNvTensorRtRtxProviderOptions params; + for (auto option : it->second) { + if (option.first == "device_id") { + if (!option.second.empty()) { + params.device_id = std::stoi(option.second); + } else { + ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'device_id' should be a number i.e. '0'.\n"); + } + } else if (option.first == "user_compute_stream") { + if (!option.second.empty()) { + auto stream = std::stoull(option.second, nullptr, 0); + params.user_compute_stream = reinterpret_cast(stream); + params.has_user_compute_stream = true; + } else { + params.has_user_compute_stream = false; + ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'user_compute_stream' should be a string to define the compute stream for the inference to run on.\n"); + } + } else if (option.first == "dump_subgraphs") { + if (option.second == "True" || option.second == "true") { + params.dump_subgraphs = true; + } else if (option.second == "False" || option.second == "false") { + params.dump_subgraphs = false; + } else { + ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'dump_subgraphs' should be 'True' or 'False'. Default value is 'False'.\n"); + } + } else if (option.first == "max_workspace_size") { + if (!option.second.empty()) { + params.max_workspace_size = std::stoull(option.second); + } else { + ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'max_workspace_size' should be a number in byte i.e. '1073741824'.\n"); + } + } else if (option.first == "detailed_build_log") { + if (option.second == "True" || option.second == "true") { + params.detailed_build_log = true; + } else if (option.second == "False" || option.second == "false") { + params.detailed_build_log = false; + } else { + ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'detailed_build_log' should be 'True' or 'False'. Default value is 'False'.\n"); + } + } else if (option.first == "profile_min_shapes") { + if (!option.second.empty()) { + std::string min_profile = option.second; + params.profile_min_shapes = min_profile.c_str(); + } else { + ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'profile_min_shapes' should be a string of 'input1:dim1xdimd2...,input2:dim1xdim2...,...'.\n"); + } + } else if (option.first == "profile_max_shapes") { + if (!option.second.empty()) { + std::string max_profile = option.second; + params.profile_max_shapes = max_profile.c_str(); + } else { + ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'profile_max_shapes' should be a string of 'input1:dim1xdimd2...,input2:dim1xdim2...,...'.\n"); + } + } else if (option.first == "profile_opt_shapes") { + if (!option.second.empty()) { + std::string opt_profile = option.second; + params.profile_opt_shapes = opt_profile.c_str(); + } else { + ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'profile_opt_shapes' should be a string of 'input1:dim1xdimd2...,input2:dim1xdim2...,...'.\n"); + } + } else if (option.first == "cuda_graph_enable") { + if (option.second == "True" || option.second == "true") { + params.cuda_graph_enable = true; + } else if (option.second == "False" || option.second == "false") { + params.cuda_graph_enable = false; + } else { + ORT_THROW("[ERROR] [NV_TensorRT_RTX] The value for the key 'cuda_graph_enable' should be 'True' or 'False'. Default value is 'False'.\n"); + } + } else { + ORT_THROW("Invalid NV_TensorRT_RTX EP option: ", option.first); + } + } + if (std::shared_ptr nv_tensorrt_rtx_provider_factory = onnxruntime::NvProviderFactoryCreator::Create(¶ms)) { + return nv_tensorrt_rtx_provider_factory->CreateProvider(); + } + } else { + if (std::shared_ptr nv_tensorrt_rtx_provider_factory = onnxruntime::NvProviderFactoryCreator::Create(cuda_device_id)) { + return nv_tensorrt_rtx_provider_factory->CreateProvider(); + } + } + } + LOGS_DEFAULT(WARNING) << "Failed to create " + << type + << ". Please reference " + << "https://onnxruntime.ai/docs/execution-providers/" + << "TensorRT-ExecutionProvider.html#requirements to ensure all dependencies are met."; +#endif } else if (type == kMIGraphXExecutionProvider) { #ifdef USE_MIGRAPHX std::string calibration_table; @@ -1532,6 +1658,12 @@ void addGlobalMethods(py::module& m) { "Register TensorRT plugins as custom ops."); #endif +#ifdef USE_NV + m.def( + "register_nv_tensorrt_rtx_plugins_as_custom_ops", [](PySessionOptions& so, const ProviderOptions& options) { RegisterNvTensorRTRtxPluginsAsCustomOps(so, options); }, + "Register NV TensorRT RTX plugins as custom ops."); +#endif + #ifdef ENABLE_ATEN m.def("register_aten_op_executor", [](const std::string& is_tensor_argument_address_str, const std::string& aten_op_executor_address_str) -> void { diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h index 8d4a882b140ac..168880517c3a5 100644 --- a/onnxruntime/python/onnxruntime_pybind_state_common.h +++ b/onnxruntime/python/onnxruntime_pybind_state_common.h @@ -29,6 +29,7 @@ struct OrtStatus { #include "core/providers/providers.h" #include "core/providers/provider_factory_creators.h" #include "core/providers/tensorrt/tensorrt_provider_options.h" +#include "core/providers/nv_tensorrt_rtx/nv_provider_options.h" #if defined(USE_CUDA) || defined(USE_ROCM) #define BACKEND_PROC "GPU" @@ -122,6 +123,9 @@ struct OrtStatus { #ifdef USE_TENSORRT #include "core/providers/tensorrt/tensorrt_provider_factory.h" #endif +#ifdef USE_NV +#include "core/providers/nv_tensorrt_rtx/nv_provider_factory.h" +#endif #ifdef USE_MIGRAPHX #include "core/providers/migraphx/migraphx_provider_factory.h" #endif @@ -173,6 +177,13 @@ ProviderInfo_TensorRT& GetProviderInfo_TensorRT(); } // namespace onnxruntime #endif +#ifdef USE_NV +namespace onnxruntime { +ProviderInfo_Nv* TryGetProviderInfo_Nv(); +ProviderInfo_Nv& GetProviderInfo_Nv(); +} // namespace onnxruntime +#endif + #ifdef USE_CANN namespace onnxruntime { ProviderInfo_CANN* TryGetProviderInfo_CANN(); diff --git a/setup.py b/setup.py index 5fc78963eca9a..1e426ea8e060b 100644 --- a/setup.py +++ b/setup.py @@ -191,6 +191,17 @@ def _rewrite_ld_preload_tensorrt(self, to_preload): f.write(" import os\n") f.write(' os.environ["ORT_TENSORRT_UNAVAILABLE"] = "1"\n') + def _rewrite_ld_preload_nv_tensorrt_rtx(self, to_preload): + with open("onnxruntime/capi/_ld_preload.py", "a", encoding="ascii") as f: + if len(to_preload) > 0: + f.write("from ctypes import CDLL, RTLD_GLOBAL\n") + f.write("try:\n") + for library in to_preload: + f.write(' _{} = CDLL("{}", mode=RTLD_GLOBAL)\n'.format(library.split(".")[0], library)) + f.write("except OSError:\n") + f.write(" import os\n") + f.write(' os.environ["ORT_NV_TENSORRT_RTX_UNAVAILABLE"] = "1"\n') + def run(self): if is_manylinux: source = "onnxruntime/capi/onnxruntime_pybind11_state.so" @@ -201,6 +212,7 @@ def run(self): to_preload = [] to_preload_cuda = [] to_preload_tensorrt = [] + to_preload_nv_tensorrt_rtx = [] to_preload_cann = [] cuda_dependencies = [ @@ -268,6 +280,7 @@ def run(self): self._rewrite_ld_preload(to_preload) self._rewrite_ld_preload_cuda(to_preload_cuda) self._rewrite_ld_preload_tensorrt(to_preload_tensorrt) + self._rewrite_ld_preload_tensorrt(to_preload_nv_tensorrt_rtx) self._rewrite_ld_preload(to_preload_cann) else: @@ -303,6 +316,7 @@ def finalize_options(self): providers_cuda_or_rocm = "onnxruntime_providers_" + ("rocm" if is_rocm else "cuda") providers_tensorrt_or_migraphx = "onnxruntime_providers_" + ("migraphx" if is_migraphx else "tensorrt") +providers_nv_tensorrt_rtx = "onnxruntime_providers_nv_tensorrt_rtx" providers_openvino = "onnxruntime_providers_openvino" providers_cann = "onnxruntime_providers_cann" providers_qnn = "onnxruntime_providers_qnn" @@ -316,6 +330,7 @@ def finalize_options(self): elif platform.system() == "Windows": providers_cuda_or_rocm = providers_cuda_or_rocm + ".dll" providers_tensorrt_or_migraphx = providers_tensorrt_or_migraphx + ".dll" + providers_nv_tensorrt_rtx = providers_nv_tensorrt_rtx + ".dll" providers_openvino = providers_openvino + ".dll" providers_cann = providers_cann + ".dll" providers_qnn = providers_qnn + ".dll" @@ -384,6 +399,7 @@ def finalize_options(self): "libiomp5md.dll", providers_cuda_or_rocm, providers_tensorrt_or_migraphx, + providers_nv_tensorrt_rtx, providers_cann, "onnxruntime.dll", ] @@ -391,6 +407,7 @@ def finalize_options(self): libs.extend(["onnxruntime_providers_shared.dll"]) libs.extend(["onnxruntime_providers_dnnl.dll"]) libs.extend(["onnxruntime_providers_tensorrt.dll"]) + libs.extend(["onnxruntime_providers_nv_tensorrt_rtx.dll"]) libs.extend(["onnxruntime_providers_openvino.dll"]) libs.extend(["onnxruntime_providers_cuda.dll"]) libs.extend(["onnxruntime_providers_vitisai.dll"])