diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc index 62210d65848d1..3a79de5fb59ce 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc @@ -15,7 +15,6 @@ #include "core/framework/ort_value.h" #include "nv_execution_provider.h" #include "nv_execution_provider_utils.h" -#include "nv_execution_provider_custom_ops.h" #include "nv_allocator.h" #include "nv_data_transfer.h" #include "onnx_ctx_model_helper.h" @@ -752,29 +751,6 @@ NvExecutionProvider::PerThreadContext::PerThreadContext(OrtDevice::DeviceId devi } } -NvExecutionProvider::PerThreadContext::~PerThreadContext() { - trt_context_map_.clear(); -} - -void NvExecutionProvider::PerThreadContext::ResetTensorRTContext(std::string fused_node) { - auto it = trt_context_map_.find(fused_node); - if (it != trt_context_map_.end()) { - trt_context_map_[fused_node].reset(); - } -} - -bool NvExecutionProvider::PerThreadContext::UpdateTensorRTContext(std::string fused_node, tensorrt_ptr::unique_pointer_exec_ctx context) { - if (!context) { - context = tensorrt_ptr::unique_pointer_exec_ctx(); - } - trt_context_map_[fused_node] = std::move(context); - - if (trt_context_map_[fused_node]) { - return true; - } - return false; -} - void NvExecutionProvider::PerThreadContext::DeleteCapturedGraph(CudaGraphAnnotation_t cuda_graph_annotation_id) { graph_id_to_run_count_.erase(cuda_graph_annotation_id); cuda_graph_.Reset(); @@ -855,24 +831,6 @@ void NvExecutionProvider::PerThreadContext::IncrementRegularRunCountBeforeGraphC graph_id_to_run_count_[cuda_graph_annotation_id]++; } -bool NvExecutionProvider::PerThreadContext::IsTensorRTContextInMap(std::string fused_node) { - auto it = trt_context_map_.find(fused_node); - if (it != trt_context_map_.end()) { - return true; - } - return false; -} - -nvinfer1::IExecutionContext& NvExecutionProvider::PerThreadContext::GetTensorRTContext(std::string fused_node) { - auto it = trt_context_map_.find(fused_node); - if (it != trt_context_map_.end()) { - return *(it->second.get()); // dereference shared pointer - } - auto context = tensorrt_ptr::unique_pointer_exec_ctx(); - trt_context_map_[fused_node] = std::move(context); - return *(trt_context_map_[fused_node].get()); // dereference shared pointer -} - void NvExecutionProvider::ReleasePerThreadContext() const { const auto& per_thread_context_cache = PerThreadContextCache(); @@ -1016,13 +974,6 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info) ep_context_file_path_ = info.ep_context_file_path; ep_context_embed_mode_ = info.ep_context_embed_mode; enable_engine_cache_for_ep_context_model(); - cache_prefix_ = info.engine_cache_prefix; - // use a more global cache if given - engine_decryption_enable_ = info.engine_decryption_enable; - if (engine_decryption_enable_) { - engine_decryption_lib_path_ = info.engine_decryption_lib_path; - } - force_sequential_engine_build_ = info.force_sequential_engine_build; sparsity_enable_ = info.sparsity_enable; auxiliary_streams_ = info.auxiliary_streams; profile_min_shapes = info.profile_min_shapes; @@ -1120,20 +1071,6 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info) cache_path_ = GetPathOrParentPathOfCtxModel(ep_context_file_path_).append(cache_path_).string(); } - if (engine_decryption_enable_) { - LIBTYPE handle = OPENLIB(engine_decryption_lib_path_.c_str()); - if (handle == nullptr) { - ORT_THROW_IF_ERROR(ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "NvTensorRTRTX EP could not open shared library from " + engine_decryption_lib_path_)); - } - engine_decryption_ = (int (*)(const char*, char*, size_t*))LIBFUNC(handle, "decrypt"); - engine_encryption_ = (int (*)(const char*, char*, size_t))LIBFUNC(handle, "encrypt"); - if (engine_decryption_ == nullptr) { - ORT_THROW_IF_ERROR(ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "NvTensorRTRTX EP could not find decryption function in shared library from " + engine_decryption_lib_path_)); - } - } - // cuda graph: // cudaStreamSynchronize() is not allowed in cuda graph capture. // @@ -1163,16 +1100,12 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info) << ", nv_dump_subgraphs: " << dump_subgraphs_ << ", nv_weight_stripped_engine_enable: " << weight_stripped_engine_enable_ << ", nv_onnx_model_folder_path: " << onnx_model_folder_path_ - << ", nv_engine_decryption_enable: " << engine_decryption_enable_ - << ", nv_engine_decryption_lib_path: " << engine_decryption_lib_path_ - << ", nv_force_sequential_engine_build: " << force_sequential_engine_build_ << ", nv_sparsity_enable: " << sparsity_enable_ << ", nv_auxiliary_streams: " << auxiliary_streams_ << ", enable_cuda_graph: " << cuda_graph_enable_ << ", nv_dump_ep_context_model: " << dump_ep_context_model_ << ", nv_ep_context_file_path: " << ep_context_file_path_ << ", nv_ep_context_embed_mode: " << ep_context_embed_mode_ - << ", nv_cache_prefix: " << cache_prefix_ << ", nv_onnx_model_bytestream_size_: " << onnx_model_bytestream_size_ << ", nv_onnx_external_bytestream_size_: " << onnx_external_data_bytestream_size_ << ", nv_use_external_data_initializer_: " << use_external_data_initializer_ @@ -1199,7 +1132,6 @@ NvExecutionProvider::~NvExecutionProvider() { if (!external_stream_ && stream_ != nullptr) { ORT_IGNORE_RETURN_VALUE(CUDA_CALL(cudaStreamDestroy(stream_))); } - ReleaseTensorRTCustomOpDomainList(info_.custom_op_domain_list); if (alloc_ != nullptr) { // This code is same as OrtApis::ReleaseAllocator defined in allocator_adapters.cc. @@ -1326,13 +1258,6 @@ nvinfer1::IBuilder* NvExecutionProvider::GetBuilder(TensorrtLogger& trt_logger) return builder_.get(); } -void NvExecutionProvider::GetCustomOpDomainList(std::vector& custom_op_domain_list) const { - auto status = CreateTensorRTCustomOpDomainList(custom_op_domain_list, info_.extra_plugin_lib_paths); - if (status != Status::OK()) { - LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] Failed to get TRT plugins from TRT plugin registration."; - } -} - // Check the graph is the subgraph of control flow op bool NvExecutionProvider::IsSubGraphOfControlFlowOp(const GraphViewer& graph) const { if (graph.IsSubgraph()) { @@ -2827,16 +2752,7 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr if (dump_ep_context_model_) { // "ep_cache_context" node attribute should be a relative path to context model directory - std::string cache_path = ""; - // Customize cache prefix if assigned - if (!cache_prefix_.empty()) { - // Generate cache suffix in case user would like to customize cache prefix - cache_path = GetCachePath(cache_path_, cache_prefix_) + fused_node.Name() + ".engine"; - ; - } else { - cache_path = GetCachePath(cache_path_, fused_node.Name()) + ".engine"; - ; - } + std::string cache_path = GetCachePath(cache_path_, fused_node.Name()) + ".engine"; // NV TRT EP per default generates hardware compatible engines for any RTX device with compute capability > 80 std::string compute_capability_hw_compat = "80+"; if (!ep_context_model_) { @@ -2931,9 +2847,8 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr input_shape_ranges_[context->node_name], &tensorrt_mu_, engine_cache_enable_, cache_path_, runtime_.get(), profiles_[context->node_name], - engine_decryption_enable_, engine_decryption_, engine_encryption_, detailed_build_log_, sparsity_enable_, - auxiliary_streams_, cuda_graph_enable_, is_dynamic_shape_context, cache_prefix_}; + auxiliary_streams_, cuda_graph_enable_, is_dynamic_shape_context}; *state = p.release(); return 0; }; @@ -3483,8 +3398,8 @@ void NvExecutionProvider::RegisterStreamHandlers(IStreamCommandHandleRegistry& s true /* release_cpu_buffer_on_cuda_stream */, stream_, external_stream_ /* use_existing_stream */, - external_cudnn_handle_, - external_cublas_handle_, + nullptr, + nullptr, {}); } diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h index bb8f687db094f..4fb56e2b90d30 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h @@ -228,16 +228,11 @@ struct TensorrtFuncState { std::string engine_cache_path; nvinfer1::IRuntime* runtime = nullptr; std::vector profiles; - bool engine_decryption_enable = false; - int (*engine_decryption)(const char*, char*, size_t*) = nullptr; - int (*engine_encryption)(const char*, char*, size_t) = nullptr; bool detailed_build_log = false; bool sparsity_enable = false; int auxiliary_streams = -1; bool cuda_graph_enable = 0; bool is_dynamic_shape = false; - std::string cache_prefix; - std::string cache_suffix; // runtime parameters std::vector> scratch_buffers; std::vector input_tensors; @@ -289,14 +284,6 @@ class NvExecutionProvider : public IExecutionProvider { // explicit NvExecutionProvider(const ProviderOptions& provider_options_map, const ConfigOptions* config_options); virtual ~NvExecutionProvider(); - cublasHandle_t PerThreadDefaultCublasHandle() { - return GetPerThreadContext().CublasHandle(); - } - - cudnnHandle_t PerThreadDefaultCudnnHandle() { - return GetPerThreadContext().CudnnHandle(); - } - virtual std::shared_ptr GetKernelRegistry() const override; std::unique_ptr GetDataTransfer() const override; @@ -306,7 +293,7 @@ class NvExecutionProvider : public IExecutionProvider { const GraphOptimizerRegistry& graph_optimizer_registry, IResourceAccountant* /* resource_accountant */) const override; - int GetDeviceId() const { return device_id_; } + int GetDeviceId() const override { return device_id_; } Status Sync() const; common::Status Compile(const std::vector& fused_nodes_and_graphs, @@ -321,8 +308,6 @@ class NvExecutionProvider : public IExecutionProvider { void RegisterStreamHandlers(IStreamCommandHandleRegistry& stream_handle_registry, AllocatorMap& allocators) const override; - void GetCustomOpDomainList(std::vector& custom_op_domain_list) const override; - OrtDevice GetOrtDeviceByMemType(OrtMemType mem_type) const override; std::vector CreatePreferredAllocators() override; @@ -353,7 +338,6 @@ class NvExecutionProvider : public IExecutionProvider { size_t min_subgraph_size_ = 1; size_t max_workspace_size_ = 0; size_t max_shared_mem_size_ = 0; - bool force_sequential_engine_build_ = false; bool dump_subgraphs_ = false; bool engine_cache_enable_ = false; bool weight_stripped_engine_enable_ = false; @@ -366,21 +350,17 @@ class NvExecutionProvider : public IExecutionProvider { size_t onnx_external_data_bytestream_size_ = 0; bool sparsity_enable_ = false; int auxiliary_streams_ = -1; - std::string cache_path_, engine_decryption_lib_path_; + std::string cache_path_; std::unique_ptr runtime_ = nullptr; std::mutex tensorrt_mu_; int device_id_; std::string compute_capability_; size_t max_ctx_mem_size_ = 0; mutable char model_path_[4096] = {}; // Reserved for max path length - bool engine_decryption_enable_ = false; - int (*engine_decryption_)(const char*, char*, size_t*) = nullptr; - int (*engine_encryption_)(const char*, char*, size_t) = nullptr; bool detailed_build_log_ = false; bool cuda_graph_enable_ = false; bool multi_profile_enable_ = false; std::filesystem::path runtime_cache_; - std::string cache_prefix_; std::string op_types_to_exclude_; int nv_profile_index_ = 0; std::unique_ptr ep_context_model_; @@ -422,10 +402,6 @@ class NvExecutionProvider : public IExecutionProvider { std::unordered_map> profiles_; std::unordered_map dds_output_allocator_maps_; - // for external stream, we need to create its cudnn/cublass handle before cuda EP enable cuda graph capture - cudnnHandle_t external_cudnn_handle_ = nullptr; - cublasHandle_t external_cublas_handle_ = nullptr; - // Call cudaStreamSynchronize() after TRT enqueueV3() mutable bool sync_stream_after_enqueue_ = true; @@ -436,20 +412,7 @@ class NvExecutionProvider : public IExecutionProvider { class PerThreadContext final { public: PerThreadContext(OrtDevice::DeviceId device_id, bool has_user_compute_stream, cudaStream_t stream); - ~PerThreadContext(); - - cublasHandle_t CublasHandle() const { - return external_cublas_handle_; - } - - cudnnHandle_t CudnnHandle() const { - return external_cudnn_handle_; - } - - bool IsTensorRTContextInMap(std::string fused_node); - nvinfer1::IExecutionContext& GetTensorRTContext(std::string fused_node); - bool UpdateTensorRTContext(std::string fused_node, tensorrt_ptr::unique_pointer_exec_ctx context); - void ResetTensorRTContext(std::string fused_node); + ~PerThreadContext() = default; // CUDA Graph management void SetCudaGraphStream(cudaStream_t stream) { cuda_graph_.SetStream(stream); } @@ -467,23 +430,6 @@ class NvExecutionProvider : public IExecutionProvider { void DeleteCapturedGraph(CudaGraphAnnotation_t cuda_graph_annotation_id); private: - cudnnHandle_t external_cudnn_handle_ = nullptr; - cublasHandle_t external_cublas_handle_ = nullptr; - - // Maintaining execution context on a per thread basis is suggested by TRT doc. - // Also, for enqueueV2() in execution context, to perform inference concurrently in multiple streams, use one execution context per stream. - // ORT multi-streams feature uses one stream for one thread, therefore maintaining execution context on a per thread basis is necessary for TRT EP, - // otherwise it may result in undefined behavior or synchronization issues. - // - // See more details here: - // https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#threading - // https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_execution_context.html#a63cd95430852038ce864e17c670e0b36 - std::unordered_map trt_context_map_; - - // The profile shape ranges for the engine that the execution context maintained by the PerThreadContext is built with. - // TRT EP needs this info to determine whether to rebuild the execution context. - std::unordered_map input_shape_ranges_; - // Cuda graph with multi threads will be supported in the future, so cuda_graph_ is put under PerThreadContext. // ORT TRT only supports CUDA graph when whole model is supported by TRT, so simply maintaining a CUDAGraph instance is enough (no need to maintain one CUDAGraph instance per TRT subgraph) CUDAGraph cuda_graph_; diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc deleted file mode 100644 index c8df7c9437adf..0000000000000 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -// Licensed under the MIT License. - -#include - -#include "core/framework/provider_options.h" -#include "nv_execution_provider_custom_ops.h" -#include "nv_execution_provider.h" - -// The filename extension for a shared library is different per platform -#ifdef _WIN32 -#define LIBRARY_PREFIX -#define LIBRARY_EXTENSION ORT_TSTR(".dll") -#elif defined(__APPLE__) -#define LIBRARY_PREFIX "lib" -#define LIBRARY_EXTENSION ".dylib" -#else -#define LIBRARY_PREFIX "lib" -#define LIBRARY_EXTENSION ".so" -#endif - -namespace onnxruntime { -extern TensorrtLogger& GetTensorrtLogger(bool verbose); - -/* - * Create custom op domain list for TRT plugins. - * - * Here, we collect all registered TRT plugins from TRT registry and create custom ops with "trt.plugins" domain. - * Additionally, if users specify extra plugin libraries, TRT EP will load them at runtime which will register those - * plugins to TRT plugin registry and later TRT EP can get them as well. - * - * There are several TRT plugins registered as onnx schema op through contrib op with ONNX domain in the past, - * for example, EfficientNMS_TRT, MultilevelCropAndResize_TRT, PyramidROIAlign_TRT and DisentangledAttention_TRT. - * In order not to break the old models using those TRT plugins which were registered with ONNX domain and maintain - * backward compatible, we need to keep those legacy TRT plugins registered with ONNX domain with contrib ops. - * - * Note: Current TRT plugin doesn't have APIs to get number of inputs/outputs of the plugin. - * So, TensorRTCustomOp uses variadic inputs/outputs to pass ONNX graph validation. - */ -common::Status CreateTensorRTCustomOpDomainList(std::vector& domain_list, const std::string extra_plugin_lib_paths) { - static std::unique_ptr custom_op_domain = std::make_unique(); - static std::vector> created_custom_op_list; - static std::mutex mutex; - std::lock_guard lock(mutex); - if (custom_op_domain->domain_ != "" && custom_op_domain->custom_ops_.size() > 0) { - domain_list.push_back(custom_op_domain.get()); - return Status::OK(); - } - - // Load any extra TRT plugin library if any. - // When the TRT plugin library is loaded, the global static object is created and the plugin is registered to TRT registry. - // This is done through macro, for example, REGISTER_TENSORRT_PLUGIN(VisionTransformerPluginCreator). - // extra_plugin_lib_paths has the format of "path_1;path_2....;path_n" - static bool is_loaded = false; - if (!extra_plugin_lib_paths.empty() && !is_loaded) { - std::stringstream extra_plugin_libs(extra_plugin_lib_paths); - std::string lib; - while (std::getline(extra_plugin_libs, lib, ';')) { - auto status = LoadDynamicLibrary(ToPathString(lib)); - if (status == Status::OK()) { - LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Successfully load " << lib; - } else { - LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP]" << status.ToString(); - } - } - is_loaded = true; - } - - try { - // Get all registered TRT plugins from registry - LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Getting all registered TRT plugins from TRT plugin registry ..."; - TensorrtLogger trt_logger = GetTensorrtLogger(false); - void* library_handle = nullptr; - const auto& env = onnxruntime::GetDefaultEnv(); - auto full_path = env.GetRuntimePath() + - PathString(LIBRARY_PREFIX ORT_TSTR("nvinfer_plugin") LIBRARY_EXTENSION); - ORT_THROW_IF_ERROR(env.LoadDynamicLibrary(full_path, false, &library_handle)); - - bool (*dyn_initLibNvInferPlugins)(void* logger, char const* libNamespace); - ORT_THROW_IF_ERROR(env.GetSymbolFromLibrary(library_handle, "initLibNvInferPlugins", (void**)&dyn_initLibNvInferPlugins)); - dyn_initLibNvInferPlugins(&trt_logger, ""); - LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] Default plugins successfully loaded."; - -#if defined(_MSC_VER) -#pragma warning(push) -#pragma warning(disable : 4996) // Ignore warning C4996: 'nvinfer1::*' was declared deprecated -#endif - } catch (const std::exception&) { - LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] Default plugin library is not on the path and is therefore ignored"; - } - try { - int num_plugin_creator = 0; - auto plugin_creators = getPluginRegistry()->getPluginCreatorList(&num_plugin_creator); - std::unordered_set registered_plugin_names; - - for (int i = 0; i < num_plugin_creator; i++) { - auto plugin_creator = plugin_creators[i]; - std::string plugin_name(plugin_creator->getPluginName()); - LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] " << plugin_name << ", version : " << plugin_creator->getPluginVersion(); - - // plugin has different versions and we only register once - if (registered_plugin_names.find(plugin_name) != registered_plugin_names.end()) { - continue; - } - - created_custom_op_list.push_back(std::make_unique(onnxruntime::kNvTensorRTRTXExecutionProvider, nullptr)); // Make sure TensorRTCustomOp object won't be cleaned up - created_custom_op_list.back().get()->SetName(plugin_creator->getPluginName()); - custom_op_domain->custom_ops_.push_back(created_custom_op_list.back().get()); - registered_plugin_names.insert(plugin_name); - } - -#if defined(_MSC_VER) -#pragma warning(pop) -#endif - - custom_op_domain->domain_ = "trt.plugins"; - domain_list.push_back(custom_op_domain.get()); - } catch (const std::exception&) { - LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] Failed to get TRT plugins from TRT plugin registration. Therefore, TRT EP can't create custom ops for TRT plugins"; - } - return Status::OK(); -} - -void ReleaseTensorRTCustomOpDomain(OrtCustomOpDomain* domain) { - if (domain != nullptr) { - for (auto ptr : domain->custom_ops_) { - if (ptr != nullptr) { - delete ptr; - } - } - delete domain; - } -} - -void ReleaseTensorRTCustomOpDomainList(std::vector& custom_op_domain_list) { - for (auto ptr : custom_op_domain_list) { - ReleaseTensorRTCustomOpDomain(ptr); - } -} - -} // namespace onnxruntime diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.h deleted file mode 100644 index 81c0d49239ec8..0000000000000 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.h +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -// Licensed under the MIT License. - -#pragma once -#define ORT_API_MANUAL_INIT -#include "core/session/onnxruntime_c_api.h" -#include "core/session/onnxruntime_cxx_api.h" -#include "core/providers/shared_library/provider_api.h" -#include "nv_execution_provider_info.h" - -using namespace onnxruntime; - -namespace onnxruntime { - -common::Status LoadDynamicLibrary(onnxruntime::PathString library_name); -common::Status CreateTensorRTCustomOpDomainList(std::vector& domain_list, - const std::string extra_plugin_lib_paths); -common::Status CreateTensorRTCustomOpDomainList(NvExecutionProviderInfo& info); -void ReleaseTensorRTCustomOpDomain(OrtCustomOpDomain* domain); -void ReleaseTensorRTCustomOpDomainList(std::vector& custom_op_domain_list); - -struct TensorRTCustomKernel { - TensorRTCustomKernel(const OrtKernelInfo* /*info*/, void* compute_stream) - : compute_stream_(compute_stream) { - } - - void Compute(OrtKernelContext* /*context*/) { - // The implementation is in TensorRT plugin. No need to implement it here. - }; - - private: - void* compute_stream_; -}; - -struct TensorRTCustomOp : Ort::CustomOpBase { - explicit TensorRTCustomOp(const char* provider, void* compute_stream) : provider_(provider), - compute_stream_(compute_stream) { - } - - void* CreateKernel(const OrtApi& /* api */, const OrtKernelInfo* info) const { - return new TensorRTCustomKernel(info, compute_stream_); - }; - - const char* GetName() const { return name_; }; - - void SetName(const char* name) { name_ = name; }; - - const char* GetExecutionProviderType() const { return provider_; }; - - size_t GetInputTypeCount() const { return num_inputs_; }; - - void SetInputTypeCount(size_t num) { num_inputs_ = num; }; - - ONNXTensorElementDataType GetInputType(size_t /*index*/) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; }; - - OrtCustomOpInputOutputCharacteristic GetInputCharacteristic(size_t) const { - return OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC; - }; - - size_t GetOutputTypeCount() const { return num_outputs_; }; - - void SetOutputTypeCount(size_t num) { num_outputs_ = num; }; - - ONNXTensorElementDataType GetOutputType(size_t /*index*/) const { return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; }; - - OrtCustomOpInputOutputCharacteristic GetOutputCharacteristic(size_t) const { - return OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_VARIADIC; - }; - - bool GetVariadicInputHomogeneity() const { - return false; // heterogenous - } - - bool GetVariadicOutputHomogeneity() const { - return false; // heterogeneous - } - - private: - const char* provider_{onnxruntime::kNvTensorRTRTXExecutionProvider}; - void* compute_stream_; - const char* name_; - size_t num_inputs_ = 1; // set to 1 to match with default min_arity for variadic input - size_t num_outputs_ = 1; // set to 1 to match with default min_arity for variadic output -}; -} // namespace onnxruntime diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc index f25718114891b..8d726d715be04 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc @@ -84,7 +84,7 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi embed_mode = 0; } - if (0 <= embed_mode || embed_mode < 2) { + if (0 <= embed_mode && embed_mode < 2) { info.ep_context_embed_mode = embed_mode; } else { ORT_THROW("Invalid ", kOrtSessionOptionEpContextEmbedMode, " must 0 or 1"); diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h index 372e8196f38c2..4c5b3a8b7fbec 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h @@ -26,7 +26,6 @@ struct NvExecutionProviderInfo { size_t max_workspace_size{0}; size_t max_shared_mem_size{0}; bool dump_subgraphs{false}; - std::string engine_cache_path{""}; bool weight_stripped_engine_enable{false}; std::string onnx_model_folder_path{""}; const void* onnx_bytestream{nullptr}; @@ -34,14 +33,10 @@ struct NvExecutionProviderInfo { bool use_external_data_initializer{false}; const void* external_data_bytestream{nullptr}; size_t external_data_bytestream_size{0}; - bool engine_decryption_enable{false}; - std::string engine_decryption_lib_path{""}; - bool force_sequential_engine_build{false}; std::string runtime_cache_path{""}; bool detailed_build_log{false}; bool sparsity_enable{false}; int auxiliary_streams{-1}; - std::string extra_plugin_lib_paths{""}; std::string profile_min_shapes{""}; std::string profile_max_shapes{""}; std::string profile_opt_shapes{""}; @@ -50,12 +45,10 @@ struct NvExecutionProviderInfo { bool dump_ep_context_model{false}; std::string ep_context_file_path{""}; int ep_context_embed_mode{0}; - std::string engine_cache_prefix{""}; std::string op_types_to_exclude{""}; static NvExecutionProviderInfo FromProviderOptions(const ProviderOptions& options, const ConfigOptions& session_options); static ProviderOptions ToProviderOptions(const NvExecutionProviderInfo& info); - std::vector custom_op_domain_list; }; } // namespace onnxruntime diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_includes.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_includes.h index a4e3777008560..b4502e5144cc7 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_includes.h +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_includes.h @@ -11,7 +11,6 @@ #endif #include -#include #include #include diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory.cc index c3fbccef84883..755d2a55a0450 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory.cc @@ -11,7 +11,6 @@ #include "nv_allocator.h" #include "core/framework/provider_options.h" #include "core/providers/nv_tensorrt_rtx/nv_provider_options.h" -#include "core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.h" #include #include "core/providers/cuda/shared_inc/cuda_call.h" #include "core/providers/cuda/cuda_stream_handle.h" @@ -31,19 +30,6 @@ struct ProviderInfo_Nv_Impl final : ProviderInfo_Nv { } return nullptr; } - - OrtStatus* GetTensorRTCustomOpDomainList(std::vector& domain_list, const std::string extra_plugin_lib_paths) override { - common::Status status = CreateTensorRTCustomOpDomainList(domain_list, extra_plugin_lib_paths); - if (!status.IsOK()) { - return CreateStatus(ORT_FAIL, "[NvTensorRTRTX EP] Can't create custom ops for TRT plugins."); - } - return nullptr; - } - - OrtStatus* ReleaseCustomOpDomainList(std::vector& domain_list) override { - ReleaseTensorRTCustomOpDomainList(domain_list); - return nullptr; - } } g_info; struct NvProviderFactory : IExecutionProviderFactory { @@ -52,7 +38,7 @@ struct NvProviderFactory : IExecutionProviderFactory { std::unique_ptr CreateProvider() override; std::unique_ptr CreateProvider(const OrtSessionOptions& session_options, - const OrtLogger& session_logger); + const OrtLogger& session_logger) override; private: NvExecutionProviderInfo info_; @@ -100,7 +86,7 @@ struct Nv_Provider : Provider { return std::make_shared(info); } - std::shared_ptr CreateExecutionProviderFactory(const void* param) { + std::shared_ptr CreateExecutionProviderFactory(const void* param) override { if (param == nullptr) { LOGS_DEFAULT(ERROR) << "[NvTensorRTRTX EP] Passed NULL options to CreateExecutionProviderFactory()"; return nullptr; @@ -757,7 +743,7 @@ struct NvTensorRtRtxEpFactory : OrtEpFactory { private: const OrtApi& ort_api; const OrtEpApi& ep_api; - const OrtLogger& default_logger; + [[maybe_unused]] const OrtLogger& default_logger; const std::string ep_name{kNvTensorRTRTXExecutionProvider}; const std::string vendor{"NVIDIA"}; diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory.h index 5672c5dda632e..59573217c9a37 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory.h +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory.h @@ -8,8 +8,6 @@ namespace onnxruntime { struct ProviderInfo_Nv { virtual OrtStatus* GetCurrentGpuDeviceId(_In_ int* device_id) = 0; - virtual OrtStatus* GetTensorRTCustomOpDomainList(std::vector& domain_list, const std::string extra_plugin_lib_paths) = 0; - virtual OrtStatus* ReleaseCustomOpDomainList(std::vector& domain_list) = 0; protected: ~ProviderInfo_Nv() = default; // Can only be destroyed through a subclass instance diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index c548f3df4fb27..bf662f3393f47 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -562,38 +562,6 @@ void RegisterTensorRTPluginsAsCustomOps(PySessionOptions& so, const ProviderOpti } #endif -#if defined(USE_NV) || defined(USE_NV_PROVIDER_INTERFACE) -void RegisterNvTensorRTRtxPluginsAsCustomOps(PySessionOptions& so, const ProviderOptions& options) { - if (auto* nv_tensorrt_rtx_provider_info = TryGetProviderInfo_Nv()) { - auto is_already_in_domains = [&](std::string& domain_name, std::vector& domains) { - for (auto ptr : domains) { - if (domain_name == ptr->domain_) { - return true; - } - } - return false; - }; - - std::string extra_plugin_lib_paths = ""; - const auto it = options.find("extra_plugin_lib_paths"); - if (it != options.end()) { - extra_plugin_lib_paths = it->second; - } - std::vector custom_op_domains; - nv_tensorrt_rtx_provider_info->GetTensorRTCustomOpDomainList(custom_op_domains, extra_plugin_lib_paths); - for (auto ptr : custom_op_domains) { - if (!is_already_in_domains(ptr->domain_, so.custom_op_domains_)) { - so.custom_op_domains_.push_back(ptr); - } else { - LOGS_DEFAULT(WARNING) << "The custom op domain name " << ptr->domain_ << " is already in session option."; - } - } - } else { - ORT_THROW("Please install TensorRT libraries as mentioned in the GPU requirements page, make sure they're in the PATH or LD_LIBRARY_PATH, and that your GPU is supported."); - } -} -#endif - /** * Creates an IExecutionProviderFactory instance of the specified type. * @param session_options The session options. @@ -1683,12 +1651,6 @@ void addGlobalMethods(py::module& m) { "Register TensorRT plugins as custom ops."); #endif -#if defined(USE_NV) || defined(USE_NV_PROVIDER_INTERFACE) - m.def( - "register_nv_tensorrt_rtx_plugins_as_custom_ops", [](PySessionOptions& so, const ProviderOptions& options) { RegisterNvTensorRTRtxPluginsAsCustomOps(so, options); }, - "Register NV TensorRT RTX plugins as custom ops."); -#endif - #ifdef ENABLE_ATEN m.def("register_aten_op_executor", [](const std::string& is_tensor_argument_address_str, const std::string& aten_op_executor_address_str) -> void {