diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc index 42f8f9fe8a62c..25c130a849793 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc @@ -281,7 +281,7 @@ bool ApplyProfileShapesFromProviderOptions(std::vector>>& profile_opt_shapes, ShapeRangesMap& input_explicit_shape_ranges) { if (trt_profiles.size() == 0) { - LOGS_DEFAULT(WARNING) << "[Nv EP] Number of optimization profiles should be greater than 0, but it's 0."; + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] Number of optimization profiles should be greater than 0, but it's 0."; return false; } @@ -295,8 +295,8 @@ bool ApplyProfileShapesFromProviderOptions(std::vectorgetDimensions(); @@ -309,7 +309,7 @@ bool ApplyProfileShapesFromProviderOptions(std::vector(profile_min_shapes[input_name][i].size()); std::vector shapes_min(shape_size), shapes_opt(shape_size), shapes_max(shape_size); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] shape size of this shape tensor is " << shape_size; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] shape size of this shape tensor is " << shape_size; for (int j = 0; j < shape_size; j++) { auto min_value = profile_min_shapes[input_name][i][j]; @@ -318,9 +318,9 @@ bool ApplyProfileShapesFromProviderOptions(std::vector(min_value); shapes_max[j] = static_cast(max_value); shapes_opt[j] = static_cast(opt_value); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] shapes_min.d[" << j << "] is " << shapes_min[j]; - LOGS_DEFAULT(VERBOSE) << "[Nv EP] shapes_max.d[" << j << "] is " << shapes_max[j]; - LOGS_DEFAULT(VERBOSE) << "[Nv EP] shapes_opt.d[" << j << "] is " << shapes_opt[j]; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] shapes_min.d[" << j << "] is " << shapes_min[j]; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] shapes_max.d[" << j << "] is " << shapes_max[j]; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] shapes_opt.d[" << j << "] is " << shapes_opt[j]; if (input_explicit_shape_ranges[input_name].find(j) == input_explicit_shape_ranges[input_name].end()) { std::vector> profile_vector(trt_profiles.size()); @@ -342,7 +342,7 @@ bool ApplyProfileShapesFromProviderOptions(std::vector(min_value); dims_max.d[j] = static_cast(max_value); dims_opt.d[j] = static_cast(opt_value); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] dims_min.d[" << j << "] is " << dims_min.d[j]; - LOGS_DEFAULT(VERBOSE) << "[Nv EP] dims_max.d[" << j << "] is " << dims_max.d[j]; - LOGS_DEFAULT(VERBOSE) << "[Nv EP] dims_opt.d[" << j << "] is " << dims_opt.d[j]; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] dims_min.d[" << j << "] is " << dims_min.d[j]; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] dims_max.d[" << j << "] is " << dims_max.d[j]; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] dims_opt.d[" << j << "] is " << dims_opt.d[j]; if (input_explicit_shape_ranges[input_name].find(j) == input_explicit_shape_ranges[input_name].end()) { std::vector> profile_vector(trt_profiles.size()); @@ -933,7 +933,7 @@ NvExecutionProvider::PerThreadContext::~PerThreadContext() { bool NvExecutionProvider::PerThreadContext::CompareProfileShapes(std::string fused_node, ShapeRangesMap& shape_ranges) { if (shape_ranges.size() > 0) { if (input_shape_ranges_[fused_node] != shape_ranges) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] The shape ranges maintained by the PerThreadContext is different from the shape ranges maintained by TRT EP. \ + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] The shape ranges maintained by the PerThreadContext is different from the shape ranges maintained by TRT EP. \ This means the engine is updated and will need to update the execution context as well."; return true; } @@ -1068,53 +1068,95 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info) } }; - // Get environment variables - if (info.has_trt_options) { - max_partition_iterations_ = info.max_partition_iterations; - min_subgraph_size_ = info.min_subgraph_size; - max_workspace_size_ = info.max_workspace_size; - dump_subgraphs_ = info.dump_subgraphs; - weight_stripped_engine_enable_ = info.weight_stripped_engine_enable; - onnx_model_folder_path_ = info.onnx_model_folder_path; - onnx_model_bytestream_ = info.onnx_bytestream; - onnx_model_bytestream_size_ = info.onnx_bytestream_size; - if ((onnx_model_bytestream_ != nullptr && onnx_model_bytestream_size_ == 0) || - (onnx_model_bytestream_ == nullptr && onnx_model_bytestream_size_ != 0)) { - ORT_THROW_IF_ERROR(ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "When providing either 'trt_onnx_bytestream_size' or " - "'trt_onnx_bytestream' both have to be provided")); - } - detailed_build_log_ = info.detailed_build_log; - dump_ep_context_model_ = info.dump_ep_context_model; - ep_context_file_path_ = info.ep_context_file_path; - ep_context_embed_mode_ = info.ep_context_embed_mode; - enable_engine_cache_for_ep_context_model(); - cache_prefix_ = info.engine_cache_prefix; - // use a more global cache if given - engine_decryption_enable_ = info.engine_decryption_enable; - if (engine_decryption_enable_) { - engine_decryption_lib_path_ = info.engine_decryption_lib_path; - } - force_sequential_engine_build_ = info.force_sequential_engine_build; - context_memory_sharing_enable_ = info.context_memory_sharing_enable; - sparsity_enable_ = info.sparsity_enable; - auxiliary_streams_ = info.auxiliary_streams; - profile_min_shapes = info.profile_min_shapes; - profile_max_shapes = info.profile_max_shapes; - profile_opt_shapes = info.profile_opt_shapes; - cuda_graph_enable_ = info.cuda_graph_enable; - op_types_to_exclude_ = info.op_types_to_exclude; - } else { - LOGS_DEFAULT(INFO) << "[Nv EP] Options were not specified"; + max_partition_iterations_ = info.max_partition_iterations; + min_subgraph_size_ = info.min_subgraph_size; + max_workspace_size_ = info.max_workspace_size; + dump_subgraphs_ = info.dump_subgraphs; + weight_stripped_engine_enable_ = info.weight_stripped_engine_enable; + onnx_model_folder_path_ = info.onnx_model_folder_path; + onnx_model_bytestream_ = info.onnx_bytestream; + onnx_model_bytestream_size_ = info.onnx_bytestream_size; + if ((onnx_model_bytestream_ != nullptr && onnx_model_bytestream_size_ == 0) || + (onnx_model_bytestream_ == nullptr && onnx_model_bytestream_size_ != 0)) { + ORT_THROW_IF_ERROR(ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "When providing either 'trt_onnx_bytestream_size' or " + "'trt_onnx_bytestream' both have to be provided")); + } + detailed_build_log_ = info.detailed_build_log; + dump_ep_context_model_ = info.dump_ep_context_model; + ep_context_file_path_ = info.ep_context_file_path; + ep_context_embed_mode_ = info.ep_context_embed_mode; + enable_engine_cache_for_ep_context_model(); + cache_prefix_ = info.engine_cache_prefix; + // use a more global cache if given + engine_decryption_enable_ = info.engine_decryption_enable; + if (engine_decryption_enable_) { + engine_decryption_lib_path_ = info.engine_decryption_lib_path; + } + force_sequential_engine_build_ = info.force_sequential_engine_build; + context_memory_sharing_enable_ = info.context_memory_sharing_enable; + sparsity_enable_ = info.sparsity_enable; + auxiliary_streams_ = info.auxiliary_streams; + profile_min_shapes = info.profile_min_shapes; + profile_max_shapes = info.profile_max_shapes; + profile_opt_shapes = info.profile_opt_shapes; + + /* + * Parse explicit min/max/opt profile shapes from provider options. + * + * The format of min/max/opt profile shapes is defined as below: + * "input1:dim1xdim2...,input2:dim1xdim2...,...,input1:dim3xdim4...,input2:dim3xdim4...,..." + * + * (Note: if multiple shapes with same input name are specified, TRT EP will consider them as multiple profiles. + * Please refer to ParserProfileShapes() for more details) + * + */ + bool status = true; + if (status) { + status = ParseProfileShapes(profile_min_shapes, profile_min_shapes_); + if (!status) { + profile_min_shapes_.clear(); + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] The format of provider option 'trt_profile_min_shapes' is wrong, please follow the format of 'input1:dim1xdimd2...,input2:dim1xdim2...,...'"; + } } + if (status) { + status = ParseProfileShapes(profile_max_shapes, profile_max_shapes_); + if (!status) { + profile_max_shapes_.clear(); + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] The format of provider option 'trt_profile_max_shapes' is wrong, please follow the format of 'input1:dim1xdimd2...,input2:dim1xdim2...,...'"; + } + } + + if (status) { + status = ParseProfileShapes(profile_opt_shapes, profile_opt_shapes_); + if (!status) { + profile_opt_shapes_.clear(); + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] The format of provider option 'trt_profile_opt_shapes' is wrong, please follow the format of 'input1:dim1xdimd2...,input2:dim1xdim2...,...'"; + } + } + + if (status) { + status = ValidateProfileShapes(profile_min_shapes_, profile_max_shapes_, profile_opt_shapes_); + if (!status) { + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] Profile shapes validation failed. Make sure the provider options 'trt_profile_min_shapes', 'trt_profile_max_shapes' and 'trt_profile_opt_shapes' have same input name and number of profile."; + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] TRT EP will implicitly create optimization profiles based on input tensor for you."; + profile_min_shapes_.clear(); + profile_max_shapes_.clear(); + profile_opt_shapes_.clear(); + } + } + + cuda_graph_enable_ = info.cuda_graph_enable; + op_types_to_exclude_ = info.op_types_to_exclude; + // Validate setting if (max_partition_iterations_ <= 0) { - // LOGS_DEFAULT(WARNING) << "[Nv EP] TensorRT option nv_max_partition_iterations must be a positive integer value. Set it to 1000"; + // LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] TensorRT option nv_max_partition_iterations must be a positive integer value. Set it to 1000"; max_partition_iterations_ = 1000; } if (min_subgraph_size_ <= 0) { - // LOGS_DEFAULT(WARNING) << "[Nv EP] TensorRT option nv_min_subgraph_size must be a positive integer value. Set it to 1"; + // LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] TensorRT option nv_min_subgraph_size must be a positive integer value. Set it to 1"; min_subgraph_size_ = 1; } @@ -1181,10 +1223,10 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info) trt_version_ = getInferLibVersion(); CUDA_CALL_THROW(cudaRuntimeGetVersion(&cuda_version_)); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] TensorRT version is " << trt_version_; - LOGS_DEFAULT(VERBOSE) << "[Nv EP] CUDA version is " << cuda_version_; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] TensorRT version is " << trt_version_; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] CUDA version is " << cuda_version_; - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Nv provider options: " + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Nv provider options: " << "device_id: " << device_id_ << ", nv_max_partition_iterations: " << max_partition_iterations_ << ", nv_min_subgraph_size: " << min_subgraph_size_ @@ -1311,15 +1353,9 @@ nvinfer1::IBuilder* NvExecutionProvider::GetBuilder(TensorrtLogger& trt_logger) } void NvExecutionProvider::GetCustomOpDomainList(std::vector& custom_op_domain_list) const { - std::string extra_plugin_lib_paths{""}; - if (info_.has_trt_options) { - if (!info_.extra_plugin_lib_paths.empty()) { - extra_plugin_lib_paths = info_.extra_plugin_lib_paths; - } - } - auto status = CreateTensorRTCustomOpDomainList(custom_op_domain_list, extra_plugin_lib_paths); + auto status = CreateTensorRTCustomOpDomainList(custom_op_domain_list, info_.extra_plugin_lib_paths); if (status != Status::OK()) { - LOGS_DEFAULT(WARNING) << "[Nv EP] Failed to get TRT plugins from TRT plugin registration."; + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] Failed to get TRT plugins from TRT plugin registration."; } } @@ -1498,7 +1534,7 @@ std::unique_ptr NvExecutionProvider::GetSubGraph(SubGraph_t gra auto meta_def = IndexedSubGraph_MetaDef::Create(); const std::string graph_type = graph.IsSubgraph() ? "subgraph" : "graph"; meta_def->name() = "TRTKernel_" + graph_type + "_" + graph.Name() + "_" + subgraph_id; - LOGS_DEFAULT(INFO) << "[Nv EP] TensorRT subgraph MetaDef name " + meta_def->name(); + LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] TensorRT subgraph MetaDef name " + meta_def->name(); // Assign inputs and outputs to subgraph's meta_def for (const auto& input : inputs) { @@ -1619,7 +1655,7 @@ SubGraphCollection_t NvExecutionProvider::GetSupportedList(SubGraphCollection_t // Only if the newly built graph has control flow op as well as it has parent node, // it needs to handle outer scope values before calling graph.Resolve(). if (has_control_flow_op && graph.ParentNode()) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Handle outer scope values for the subgraph " << graph_build.Name(); + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Handle outer scope values for the subgraph " << graph_build.Name(); BuildSubGraphContext(graph_build); SetGraphOuterScopeValuesAndInputs(graph_build, graph.GetGraph()); SetAllGraphInputs(graph_build); @@ -2005,9 +2041,9 @@ NvExecutionProvider::GetCapability(const GraphViewer& graph, } SubGraphCollection_t consolidated_supported_nodes_vector = {{nodes_vector, true}}; if (DetectTensorRTGraphCycles(consolidated_supported_nodes_vector, graph, model_hash, false)) { - LOGS_DEFAULT(INFO) << "[Nv EP] TensorRT nodes are not consolidated because graph will have cycles after consolidation"; + LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] TensorRT nodes are not consolidated because graph will have cycles after consolidation"; } else { - LOGS_DEFAULT(INFO) << "[Nv EP] TensorRT nodes are consolidated into one subgraph"; + LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] TensorRT nodes are consolidated into one subgraph"; supported_nodes_vector = consolidated_supported_nodes_vector; } } @@ -2072,7 +2108,7 @@ NvExecutionProvider::GetCapability(const GraphViewer& graph, } } } - LOGS_DEFAULT(INFO) << "[Nv EP] Whole graph will run on Nv execution provider"; + LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] Whole graph will run on Nv execution provider"; // The context map is only used during EP compile time, release it to save memory space. subgraph_context_map_.clear(); @@ -2092,11 +2128,11 @@ NvExecutionProvider::GetCapability(const GraphViewer& graph, const size_t number_of_subgraphs = supported_nodes_vector.size(); if (number_of_trt_nodes == 0) { - LOGS_DEFAULT(WARNING) << "[Nv EP] No graph will run on Nv execution provider"; + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] No graph will run on Nv execution provider"; } else if (number_of_trt_nodes == number_of_ort_nodes) { - LOGS_DEFAULT(INFO) << "[Nv EP] Whole graph will run on Nv execution provider"; + LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] Whole graph will run on Nv execution provider"; } else { - LOGS_DEFAULT(INFO) << "[Nv EP] Graph is partitioned and number of subgraphs running on Nv executio provider is " << number_of_subgraphs; + LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] Graph is partitioned and number of subgraphs running on Nv executio provider is " << number_of_subgraphs; } // The context map is only used during EP compile time, release it to save memory space. @@ -2154,20 +2190,20 @@ common::Status NvExecutionProvider::RefitEngine(std::string onnx_model_filename, auto parser_refitter = std::unique_ptr( nvonnxparser::createParserRefitter(*refitter, trt_logger)); if (refit_from_file) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Refitting from file on disk: " << onnx_model_path.string(); + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Refitting from file on disk: " << onnx_model_path.string(); if (!parser_refitter->refitFromFile(onnx_model_path.string().c_str())) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "Nv EP's IParserRefitter could not refit deserialized weight-stripped engine with weights contained in: " + onnx_model_path.string()); } } else { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Refitting from byte array"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Refitting from byte array"; if (!parser_refitter->refitFromBytes(onnx_model_bytestream, onnx_model_bytestream_size)) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "Nv EP's IParserRefitter could not refit deserialized weight-stripped engine with weights contained in the provided bytestraem"); } } if (refitter->refitCudaEngine()) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Successfully refitted the weight-stripped engine."; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Successfully refitted the weight-stripped engine."; } else { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "Nv EP's IRefitter could not refit deserialized weight-stripped engine with weights contained in: " + onnx_model_path.string()); @@ -2179,7 +2215,7 @@ common::Status NvExecutionProvider::RefitEngine(std::string onnx_model_filename, nvinfer1::IHostMemory* serialized_engine = trt_engine->serialize(); std::ofstream engine_file(refitted_engine_cache, std::ios::binary | std::ios::out); engine_file.write(reinterpret_cast(serialized_engine->data()), serialized_engine->size()); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Serialize the refitted engine to " << refitted_engine_cache; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Serialize the refitted engine to " << refitted_engine_cache; } return Status::OK(); } @@ -2342,7 +2378,7 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr has_dynamic_shape |= tensor_is_dynamic(input); } if (has_dynamic_shape) { - LOGS_DEFAULT(WARNING) << "[Nv EP] No explicit optimization profile was specified. " + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] No explicit optimization profile was specified. " "We will assume a single profile with fully dynamic range. " "This feature is experimental and may change in the future." "If you plan to use this model as fixed shape we recommend using a free dimension override: " @@ -2365,7 +2401,7 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr if (has_explicit_profile && tensor_has_profile) { apply_profile = ApplyProfileShapesFromProviderOptions(trt_profiles, input, profile_min_shapes_, profile_max_shapes_, profile_opt_shapes_, input_explicit_shape_ranges); } else { - LOGS_DEFAULT(INFO) << "[Nv EP] Creating implicit profile for tensor " << input_name; + LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] Creating implicit profile for tensor " << input_name; profile_min_shapes_[input_name] = std::vector>{{}}; profile_min_shapes_[input_name][0].resize(dims.nbDims); profile_opt_shapes_[input_name] = std::vector>{{}}; @@ -2422,20 +2458,20 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr // enable sparse weights if (sparsity_enable_) { trt_config->setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Sparse weights are allowed"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Sparse weights are allowed"; } // limit auxiliary streams if (auxiliary_streams_ >= 0) { trt_config->setMaxAuxStreams(auxiliary_streams_); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Auxiliary streams are se to " << auxiliary_streams_; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Auxiliary streams are se to " << auxiliary_streams_; } if (weight_stripped_engine_enable_) { trt_config->setFlag(nvinfer1::BuilderFlag::kSTRIP_PLAN); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] STRIP_PLAN is enabled"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] STRIP_PLAN is enabled"; trt_config->setFlag(nvinfer1::BuilderFlag::kREFIT_IDENTICAL); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] REFIT_IDENTICAL is enabled"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] REFIT_IDENTICAL is enabled"; } // Build TRT engine (if needed) and load TRT engine if: @@ -2518,7 +2554,7 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr } if (weight_stripped_engine_refit_) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Refit engine from main ONNX file after engine build"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Refit engine from main ONNX file after engine build"; char* onnx = string_buf.data(); size_t onnx_size = string_buf.size(); auto status = RefitEngine(model_path_, diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc index 5559e2e791d40..0806ae3638036 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc @@ -58,9 +58,9 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector& while (std::getline(extra_plugin_libs, lib, ';')) { auto status = LoadDynamicLibrary(ToPathString(lib)); if (status == Status::OK()) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Successfully load " << lib; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Successfully load " << lib; } else { - LOGS_DEFAULT(WARNING) << "[Nv EP]" << status.ToString(); + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP]" << status.ToString(); } } is_loaded = true; @@ -68,7 +68,7 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector& try { // Get all registered TRT plugins from registry - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Getting all registered TRT plugins from TRT plugin registry ..."; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Getting all registered TRT plugins from TRT plugin registry ..."; TensorrtLogger trt_logger = GetTensorrtLogger(false); void* library_handle = nullptr; const auto& env = onnxruntime::GetDefaultEnv(); @@ -79,14 +79,14 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector& bool (*dyn_initLibNvInferPlugins)(void* logger, char const* libNamespace); ORT_THROW_IF_ERROR(env.GetSymbolFromLibrary(library_handle, "initLibNvInferPlugins", (void**)&dyn_initLibNvInferPlugins)); dyn_initLibNvInferPlugins(&trt_logger, ""); - LOGS_DEFAULT(INFO) << "[Nv EP] Default plugins successfully loaded."; + LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] Default plugins successfully loaded."; #if defined(_MSC_VER) #pragma warning(push) #pragma warning(disable : 4996) // Ignore warning C4996: 'nvinfer1::*' was declared deprecated #endif } catch (const std::exception&) { - LOGS_DEFAULT(INFO) << "[Nv EP] Default plugin library is not on the path and is therefore ignored"; + LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] Default plugin library is not on the path and is therefore ignored"; } try { int num_plugin_creator = 0; @@ -96,7 +96,7 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector& for (int i = 0; i < num_plugin_creator; i++) { auto plugin_creator = plugin_creators[i]; std::string plugin_name(plugin_creator->getPluginName()); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] " << plugin_name << ", version : " << plugin_creator->getPluginVersion(); + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] " << plugin_name << ", version : " << plugin_creator->getPluginVersion(); // plugin has different versions and we only register once if (registered_plugin_names.find(plugin_name) != registered_plugin_names.end()) { @@ -116,7 +116,7 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector& custom_op_domain->domain_ = "trt.plugins"; domain_list.push_back(custom_op_domain.get()); } catch (const std::exception&) { - LOGS_DEFAULT(WARNING) << "[Nv EP] Failed to get TRT plugins from TRT plugin registration. Therefore, TRT EP can't create custom ops for TRT plugins"; + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] Failed to get TRT plugins from TRT plugin registration. Therefore, TRT EP can't create custom ops for TRT plugins"; } return Status::OK(); } diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_helper.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_helper.cc index 5373b6fd08afc..cd50f1e6b2d48 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_helper.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_helper.cc @@ -169,31 +169,31 @@ void NvExecutionProvider::SetGraphOuterScopeValuesAndInputs(Graph& graph_build, } std::string unique_graph_name = GetUniqueGraphName(*top_level_graph); if (subgraph_context_map_.find(unique_graph_name) == subgraph_context_map_.end()) { - LOGS_DEFAULT(ERROR) << "[Nv EP] Can't find top-level graph context. \ + LOGS_DEFAULT(ERROR) << "[NvTensorRTRTX EP] Can't find top-level graph context. \ Please check BuildSubGraphContext() has built the graph context correctly."; return; } SubGraphContext* context = subgraph_context_map_.at(unique_graph_name).get(); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Subgraph name is " << graph_build.Name(); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Its parent node is " << graph.ParentNode()->Name(); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Its parent node's implicit inputs:"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Subgraph name is " << graph_build.Name(); + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Its parent node is " << graph.ParentNode()->Name(); + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Its parent node's implicit inputs:"; // Iterate all the implicit inputs to set outer scope value for the newly built subgraph for (const auto& input : graph.ParentNode()->ImplicitInputDefs()) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] \t" << input->Name(); + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] \t" << input->Name(); // The node arg in parent node's implicit inputs could be used for parent node's other subgraph, for example // "If" op has two subgraphs. So we need to make sure that the node arg is used in current subgraph only. // (GetNodeArg searches for specific node arg in all node args in the graph) if (graph_build.GetNodeArg(input->Name())) { graph_build.AddOuterScopeNodeArg(input->Name()); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] \t" << input->Name() << " is used in this subgraph"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] \t" << input->Name() << " is used in this subgraph"; if (context && (context->manually_added_graph_inputs.find(input->Name()) != context->manually_added_graph_inputs.end())) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] \t" << input->Name() << " is already been added as an explicit input to graph"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] \t" << input->Name() << " is already been added as an explicit input to graph"; continue; } @@ -213,7 +213,7 @@ void NvExecutionProvider::SetGraphOuterScopeValuesAndInputs(Graph& graph_build, type_proto->copy_from(input->TypeAsProto()); auto& n_input = top_level_graph->GetOrCreateNodeArg(name, type_proto.get()); context->manually_added_graph_inputs[n_input.Name()] = &n_input; - LOGS_DEFAULT(VERBOSE) << "[Nv EP] \t" << n_input.Name() << " is added as an explicit input into the newly built graph"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] \t" << n_input.Name() << " is added as an explicit input into the newly built graph"; } } } diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc index 05e5f7659efac..f5ba66746c3c4 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc @@ -4,13 +4,15 @@ #include "core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h" #include "core/providers/nv_tensorrt_rtx/nv_provider_options.h" +#include "core/session/onnxruntime_session_options_config_keys.h" #include "core/common/make_string.h" #include "core/common/parse_string.h" #include "core/framework/provider_options_utils.h" #include "core/providers/cuda/cuda_common.h" namespace onnxruntime { -NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const ProviderOptions& options) { +NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const ProviderOptions& options, + const ConfigOptions& session_options) { NvExecutionProviderInfo info{}; void* user_compute_stream = nullptr; void* onnx_bytestream = nullptr; @@ -58,6 +60,25 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi info.user_compute_stream = user_compute_stream; info.has_user_compute_stream = (user_compute_stream != nullptr); info.onnx_bytestream = onnx_bytestream; + + // EP context settings + const auto embed_enable = session_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0"); + if (embed_enable == "0") { + info.dump_ep_context_model = false; + } else if (embed_enable == "1") { + info.dump_ep_context_model = true; + } else { + ORT_THROW("Invalid ", kOrtSessionOptionEpContextEnable, " must 0 or 1"); + } + info.ep_context_file_path = session_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, ""); + + const auto embed_mode = std::stoi(session_options.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "1")); + if (0 <= embed_mode || embed_mode < 2) { + info.ep_context_embed_mode = embed_mode; + } else { + ORT_THROW("Invalid ", kOrtSessionOptionEpContextEmbedMode, " must 0 or 1"); + } + return info; } diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h index c3c4dba1ed982..626039e5ef7c8 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h @@ -8,8 +8,9 @@ #include "core/framework/ortdevice.h" #include "core/framework/provider_options.h" #include "core/framework/framework_provider_common.h" -#include "core/session/onnxruntime_c_api.h" #include "core/framework/library_handles.h" +#include "core/session/onnxruntime_c_api.h" +#include "core/providers/shared_library/provider_api.h" #define TRT_DEFAULT_OPTIMIZER_LEVEL 3 @@ -19,18 +20,10 @@ struct NvExecutionProviderInfo { int device_id{0}; bool has_user_compute_stream{false}; void* user_compute_stream{nullptr}; - bool has_trt_options{false}; int max_partition_iterations{1000}; int min_subgraph_size{1}; size_t max_workspace_size{0}; - bool fp16_enable{false}; - bool int8_enable{false}; - std::string int8_calibration_table_name{""}; - bool int8_use_native_calibration_table{false}; - bool dla_enable{false}; - int dla_core{0}; bool dump_subgraphs{false}; - bool engine_cache_enable{false}; std::string engine_cache_path{""}; bool weight_stripped_engine_enable{false}; std::string onnx_model_folder_path{""}; @@ -40,16 +33,10 @@ struct NvExecutionProviderInfo { std::string engine_decryption_lib_path{""}; bool force_sequential_engine_build{false}; bool context_memory_sharing_enable{false}; - bool layer_norm_fp32_fallback{false}; - bool timing_cache_enable{false}; std::string timing_cache_path{""}; - bool force_timing_cache{false}; bool detailed_build_log{false}; - bool build_heuristics_enable{false}; bool sparsity_enable{false}; - int builder_optimization_level{3}; int auxiliary_streams{-1}; - std::string tactic_sources{""}; std::string extra_plugin_lib_paths{""}; std::string profile_min_shapes{""}; std::string profile_max_shapes{""}; @@ -59,10 +46,10 @@ struct NvExecutionProviderInfo { std::string ep_context_file_path{""}; int ep_context_embed_mode{0}; std::string engine_cache_prefix{""}; - bool engine_hw_compatible{false}; std::string op_types_to_exclude{""}; - static NvExecutionProviderInfo FromProviderOptions(const ProviderOptions& options); + static NvExecutionProviderInfo FromProviderOptions(const ProviderOptions& options, + const ConfigOptions& session_options); static ProviderOptions ToProviderOptions(const NvExecutionProviderInfo& info); std::vector custom_op_domain_list; }; diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_utils.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_utils.h index 169127f222949..046010deedf62 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_utils.h +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_utils.h @@ -153,22 +153,22 @@ std::unordered_map>>>& shape_ranges) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] In SerializeProfileV2()"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] In SerializeProfileV2()"; // Serialize profile flexbuffers::Builder builder; auto tensor_map_start = builder.StartMap(); for (auto tensor_it = shape_ranges.begin(); tensor_it != shape_ranges.end(); tensor_it++) { // iterate tensors - LOGS_DEFAULT(VERBOSE) << "[Nv EP] input tensor is '" << tensor_it->first.c_str() << "'"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] input tensor is '" << tensor_it->first.c_str() << "'"; builder.TypedVector(tensor_it->first.c_str(), [&] { for (auto dim_it = tensor_it->second.begin(); dim_it != tensor_it->second.end(); dim_it++) { size_t num_profiles = dim_it->second.size(); for (size_t i = 0; i < num_profiles; i++) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] profile #" << i << ", dim is " << dim_it->first; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] profile #" << i << ", dim is " << dim_it->first; builder.Int(dim_it->first); builder.Int(dim_it->second[i][0]); builder.Int(dim_it->second[i][1]); builder.Int(dim_it->second[i][2]); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] " << dim_it->first << ", " << dim_it->second[i][0] << ", " << dim_it->second[i][1] << ", " << dim_it->second[i][2]; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] " << dim_it->first << ", " << dim_it->second[i][0] << ", " << dim_it->second[i][1] << ", " << dim_it->second[i][2]; } } }); @@ -233,7 +233,7 @@ void SerializeProfileV2(const std::string& file_name, std::unordered_map>>> DeserializeProfileV2(std::ifstream& infile) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] In DeserializeProfileV2()"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] In DeserializeProfileV2()"; // Load flexbuffer infile.seekg(0, std::ios::end); size_t length = infile.tellg(); @@ -248,7 +248,7 @@ std::unordered_map>> inner_map; std::vector> profile_vector; @@ -265,7 +265,7 @@ std::unordered_map>>& profile_opt_shapes) { std::ifstream profile_file(file_name, std::ios::binary | std::ios::in); if (!profile_file) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] " << file_name << " doesn't exist."; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] " << file_name << " doesn't exist."; return true; } @@ -313,7 +313,7 @@ bool CompareProfiles(const std::string& file_name, // Check number of dynamic shape inputs if (profile_min_shapes.size() != shape_ranges.size()) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Numbers of dynamic shape inputs are not the same."; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Numbers of dynamic shape inputs are not the same."; return true; } @@ -321,7 +321,7 @@ bool CompareProfiles(const std::string& file_name, for (auto tensor_it = shape_ranges.begin(); tensor_it != shape_ranges.end(); tensor_it++) { // iterate tensors auto tensor_name = tensor_it->first; if (profile_min_shapes.find(tensor_name) == profile_min_shapes.end()) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Tensor name '" << tensor_name << "' doesn't exist in trt_profile_min_shapes."; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Tensor name '" << tensor_name << "' doesn't exist in trt_profile_min_shapes."; return true; } @@ -330,35 +330,35 @@ bool CompareProfiles(const std::string& file_name, auto num_profiles = GetNumProfiles(profile_min_shapes); if (dim_it->second.size() != static_cast(num_profiles)) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Numbers of profiles are not the same."; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Numbers of profiles are not the same."; return true; } for (size_t i = 0; i < dim_it->second.size(); i++) { // iterate (multiple) profile(s) auto shape_values = dim_it->second[i]; if (dim > (profile_min_shapes[tensor_name][i].size() - 1)) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] dimension " << dim << " of '" << tensor_name << "' in " << file_name << " exceeds the total dimension of trt_profile_min_shapes."; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] dimension " << dim << " of '" << tensor_name << "' in " << file_name << " exceeds the total dimension of trt_profile_min_shapes."; return true; } - LOGS_DEFAULT(VERBOSE) << "[Nv EP] min shape value of dimension " << dim << " of '" << tensor_name << "' is " << profile_min_shapes[tensor_name][i][dim]; - LOGS_DEFAULT(VERBOSE) << "[Nv EP] min shape value of dimension " << dim << " of '" << tensor_name << "' is " << shape_values[0] << " in " << file_name; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] min shape value of dimension " << dim << " of '" << tensor_name << "' is " << profile_min_shapes[tensor_name][i][dim]; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] min shape value of dimension " << dim << " of '" << tensor_name << "' is " << shape_values[0] << " in " << file_name; if (profile_min_shapes[tensor_name][i][dim] != shape_values[0]) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] min shape values of dimension " << dim << " of '" << tensor_name << "' are not the same"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] min shape values of dimension " << dim << " of '" << tensor_name << "' are not the same"; return true; } - LOGS_DEFAULT(VERBOSE) << "[Nv EP] max shape value of dimension " << dim << " of '" << tensor_name << "' is " << profile_max_shapes[tensor_name][i][dim]; - LOGS_DEFAULT(VERBOSE) << "[Nv EP] max shape value of dimension " << dim << " of '" << tensor_name << "' is " << shape_values[1] << " in " << file_name; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] max shape value of dimension " << dim << " of '" << tensor_name << "' is " << profile_max_shapes[tensor_name][i][dim]; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] max shape value of dimension " << dim << " of '" << tensor_name << "' is " << shape_values[1] << " in " << file_name; if (profile_max_shapes[tensor_name][i][dim] != shape_values[1]) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] max shape values of dimension " << dim << " of '" << tensor_name << "' are not the same"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] max shape values of dimension " << dim << " of '" << tensor_name << "' are not the same"; return true; } - LOGS_DEFAULT(VERBOSE) << "[Nv EP] opt shape value of dimension " << dim << " of '" << tensor_name << "' is " << profile_opt_shapes[tensor_name][i][dim]; - LOGS_DEFAULT(VERBOSE) << "[Nv EP] opt shape value of dimension " << dim << " of '" << tensor_name << "' is " << shape_values[2] << " in " << file_name; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] opt shape value of dimension " << dim << " of '" << tensor_name << "' is " << profile_opt_shapes[tensor_name][i][dim]; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] opt shape value of dimension " << dim << " of '" << tensor_name << "' is " << shape_values[2] << " in " << file_name; if (profile_opt_shapes[tensor_name][i][dim] != shape_values[2]) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] opt shape values of dimension " << dim << " of '" << tensor_name << "' are not the same"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] opt shape values of dimension " << dim << " of '" << tensor_name << "' are not the same"; return true; } } @@ -461,7 +461,7 @@ HashValue TRTGenerateId(const GraphViewer& graph_viewer, std::string trt_version if (main_graph.ModelPath().has_filename()) { std::string model_name = PathToUTF8String(main_graph.ModelPath().filename()); - LOGS_DEFAULT(INFO) << "[Nv EP] Model name is " << model_name; + LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] Model name is " << model_name; // Ensure enough characters are hashed in case model names are too short const size_t model_name_length = model_name.size(); constexpr size_t hash_string_length = 500; @@ -471,7 +471,7 @@ HashValue TRTGenerateId(const GraphViewer& graph_viewer, std::string trt_version } hash_str(repeat_model_name); } else { - LOGS_DEFAULT(INFO) << "[Nv EP] Model path is empty"; + LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] Model path is empty"; } // fingerprint current graph by hashing graph inputs @@ -567,7 +567,7 @@ bool MakeInputNameShapePair(std::string pair_string, std::pair& domain_list, const std::string extra_plugin_lib_paths) override { common::Status status = CreateTensorRTCustomOpDomainList(domain_list, extra_plugin_lib_paths); if (!status.IsOK()) { - return CreateStatus(ORT_FAIL, "[Nv EP] Can't create custom ops for TRT plugins."); + return CreateStatus(ORT_FAIL, "[NvTensorRTRTX EP] Can't create custom ops for TRT plugins."); } return nullptr; } @@ -79,7 +79,7 @@ std::unique_ptr NvProviderFactory::CreateProvider(const OrtS provider_options[key.substr(key_prefix.size())] = value; } } - NvExecutionProviderInfo info = onnxruntime::NvExecutionProviderInfo::FromProviderOptions(provider_options); + NvExecutionProviderInfo info = onnxruntime::NvExecutionProviderInfo::FromProviderOptions(provider_options, config_options); auto ep = std::make_unique(info); ep->SetLogger(reinterpret_cast(&session_logger)); @@ -91,14 +91,26 @@ struct Nv_Provider : Provider { std::shared_ptr CreateExecutionProviderFactory(int device_id) override { NvExecutionProviderInfo info; info.device_id = device_id; - info.has_trt_options = false; return std::make_shared(info); } - std::shared_ptr CreateExecutionProviderFactory(const void* options) { - const ProviderOptions* provider_options = reinterpret_cast(options); - NvExecutionProviderInfo info = onnxruntime::NvExecutionProviderInfo::FromProviderOptions(*provider_options); + std::shared_ptr CreateExecutionProviderFactory(const void* param) { + if (param == nullptr) { + LOGS_DEFAULT(ERROR) << "[NvTensorRTRTX EP] Passed NULL options to CreateExecutionProviderFactory()"; + return nullptr; + } + + std::array pointers_array = *reinterpret_cast*>(param); + const ProviderOptions* provider_options = reinterpret_cast(pointers_array[0]); + const ConfigOptions* config_options = reinterpret_cast(pointers_array[1]); + + if (provider_options == nullptr) { + LOGS_DEFAULT(ERROR) << "[NvTensorRTRTX EP] Passed NULL ProviderOptions to CreateExecutionProviderFactory()"; + return nullptr; + } + + NvExecutionProviderInfo info = onnxruntime::NvExecutionProviderInfo::FromProviderOptions(*provider_options, *config_options); return std::make_shared(info); } diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory_creator.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory_creator.h index 7eeb6cce4fa03..616f5f1fbe754 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory_creator.h +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory_creator.h @@ -9,9 +9,12 @@ #include "core/providers/providers.h" namespace onnxruntime { +struct SessionOptions; + // defined in provider_bridge_ort.cc struct NvProviderFactoryCreator { static std::shared_ptr Create(int device_id); - static std::shared_ptr Create(const ProviderOptions& provider_options); + static std::shared_ptr Create(const ProviderOptions& provider_options_map, + const SessionOptions* session_options); }; } // namespace onnxruntime diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/onnx_ctx_model_helper.cc index 4f84e853f999c..25decd8f2ce8f 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/onnx_ctx_model_helper.cc @@ -213,7 +213,7 @@ void DumpCtxModel(ONNX_NAMESPACE::ModelProto* model_proto, const std::string& ctx_model_path) { std::fstream dump(ctx_model_path, std::ios::out | std::ios::trunc | std::ios::binary); model_proto->SerializeToOstream(dump); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Dumped " + ctx_model_path; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Dumped " + ctx_model_path; } bool IsAbsolutePath(const std::string& path_string) { @@ -285,7 +285,7 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph const std::string& context_binary = attrs.at(EP_CACHE_CONTEXT).s(); *(trt_engine_) = std::unique_ptr(trt_runtime_->deserializeCudaEngine(const_cast(context_binary.c_str()), static_cast(context_binary.length()))); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] Read engine as binary data from \"ep_cache_context\" attribute of ep context node and deserialized it"; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] Read engine as binary data from \"ep_cache_context\" attribute of ep context node and deserialized it"; if (!(*trt_engine_)) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "Nv EP could not deserialize engine from binary data"); @@ -324,7 +324,7 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph // The engine cache and context model (current model) should be in the same directory std::filesystem::path ctx_model_dir(GetPathOrParentPathOfCtxModel(ep_context_model_path_)); auto engine_cache_path = ctx_model_dir.append(cache_path); - LOGS_DEFAULT(VERBOSE) << "[Nv EP] GetEpContextFromGraph engine_cache_path: " + engine_cache_path.string(); + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] GetEpContextFromGraph engine_cache_path: " + engine_cache_path.string(); // If it's a weight-stripped engine cache, it needs to be refitted even though the refit flag is not enabled if (!weight_stripped_engine_refit_) { @@ -335,7 +335,7 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph if (weight_stripped_engine_refit_) { const std::filesystem::path refitted_engine_cache_path = GetWeightRefittedEnginePath(engine_cache_path.string()); if (std::filesystem::exists(refitted_engine_cache_path)) { - LOGS_DEFAULT(VERBOSE) << "[Nv EP] " + refitted_engine_cache_path.string() + " exists."; + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] " + refitted_engine_cache_path.string() + " exists."; engine_cache_path = refitted_engine_cache_path.string(); weight_stripped_engine_refit_ = false; } @@ -358,7 +358,7 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const GraphViewer& graph return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "Nv EP could not deserialize engine from cache: " + engine_cache_path.string()); } - LOGS_DEFAULT(VERBOSE) << "[Nv EP] DeSerialized " + engine_cache_path.string(); + LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] DeSerialized " + engine_cache_path.string(); if (weight_stripped_engine_refit_) { const std::string onnx_model_filename = attrs.at(ONNX_MODEL_FILENAME).s(); @@ -394,14 +394,14 @@ bool TensorRTCacheModelHandler::ValidateEPCtxNode(const GraphViewer& graph_viewe std::string model_compute_capability = attrs.at(COMPUTE_CAPABILITY).s(); // Verify if engine was compiled with ampere+ hardware compatibility enabled if (model_compute_capability == "80+") { - LOGS_DEFAULT(WARNING) << "[Nv EP] Engine is compatible to all Ampere+ GPU (except Jetson)"; + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] Engine is compatible to all Ampere+ GPU (except Jetson)"; if (std::stoi(compute_capability_) < 80) { - LOGS_DEFAULT(WARNING) << "[Nv EP] However, this GPU doesn't match. The compute capability of the GPU: " << compute_capability_; + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] However, this GPU doesn't match. The compute capability of the GPU: " << compute_capability_; } } else if (model_compute_capability != compute_capability_) { - LOGS_DEFAULT(WARNING) << "[Nv EP] Engine was compiled for a different compatibility level and might not work or perform suboptimal"; - LOGS_DEFAULT(WARNING) << "[Nv EP] The compute capability of the engine: " << model_compute_capability; - LOGS_DEFAULT(WARNING) << "[Nv EP] The compute capability of the GPU: " << compute_capability_; + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] Engine was compiled for a different compatibility level and might not work or perform suboptimal"; + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] The compute capability of the engine: " << model_compute_capability; + LOGS_DEFAULT(WARNING) << "[NvTensorRTRTX EP] The compute capability of the GPU: " << compute_capability_; } } diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 0ff51a12401bc..7fcaee48581f6 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -10,6 +10,7 @@ #include "core/common/inlined_containers.h" #include "core/common/path_string.h" #include "core/common/string_helper.h" + #include "core/framework/allocator_utils.h" #include "core/framework/compute_capability.h" #include "core/framework/config_options.h" @@ -2049,8 +2050,15 @@ std::shared_ptr NvProviderFactoryCreator::Create(int } std::shared_ptr NvProviderFactoryCreator::Create( - const ProviderOptions& provider_options) try { - return s_library_nv.Get().CreateExecutionProviderFactory(&provider_options); + const ProviderOptions& provider_options, const SessionOptions* session_options) try { + const ConfigOptions* config_options = nullptr; + if (session_options != nullptr) { + config_options = &session_options->config_options; + } + + std::array configs_array = {&provider_options, config_options}; + const void* arg = reinterpret_cast(&configs_array); + return s_library_nv.Get().CreateExecutionProviderFactory(arg); } catch (const std::exception& exception) { // Will get an exception when fail to load EP library. LOGS_DEFAULT(ERROR) << exception.what(); @@ -2647,8 +2655,7 @@ ORT_API_STATUS_IMPL(OrtApis::UpdateTensorRTProviderOptions, defined(USE_CUDA) || defined(USE_CUDA_PROVIDER_INTERFACE) || \ defined(USE_CANN) || \ defined(USE_DNNL) || \ - defined(USE_ROCM) || \ - defined(USE_NV) || defined(USE_NV_PROVIDER_INTERFACE) + defined(USE_ROCM) static std::string BuildOptionsString(const onnxruntime::ProviderOptions::iterator& begin, const onnxruntime::ProviderOptions::iterator& end) { std::ostringstream options; diff --git a/onnxruntime/core/session/provider_registration.cc b/onnxruntime/core/session/provider_registration.cc index c33cc170f7b0f..2108626e36853 100644 --- a/onnxruntime/core/session/provider_registration.cc +++ b/onnxruntime/core/session/provider_registration.cc @@ -301,7 +301,7 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider, } case EpID::NvTensorRtRtx: { #if defined(USE_NV) || defined(USE_NV_PROVIDER_INTERFACE) - auto factory = onnxruntime::NvProviderFactoryCreator::Create(provider_options); + auto factory = onnxruntime::NvProviderFactoryCreator::Create(provider_options, &(options->value)); if (factory) { options->provider_factories.push_back(factory); } else { diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 7923cdf0f85b0..0f15c5fbbdba0 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -890,7 +890,8 @@ std::unique_ptr CreateExecutionProviderInstance( auto it = provider_options_map.find(type); if (it != provider_options_map.end()) { ProviderOptions info = it->second; - if (std::shared_ptr nv_tensorrt_rtx_provider_factory = onnxruntime::NvProviderFactoryCreator::Create(info)) { + if (std::shared_ptr nv_tensorrt_rtx_provider_factory = onnxruntime::NvProviderFactoryCreator::Create( + info, &session_options)) { return nv_tensorrt_rtx_provider_factory->CreateProvider(); } } else { diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/nv_basic_test.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/nv_basic_test.cc index 9515c8eb78ed6..f3a963ce47eda 100644 --- a/onnxruntime/test/providers/nv_tensorrt_rtx/nv_basic_test.cc +++ b/onnxruntime/test/providers/nv_tensorrt_rtx/nv_basic_test.cc @@ -9,6 +9,8 @@ #include "test/common/trt_op_test_utils.h" #include +#include +#include #include #include #include @@ -22,9 +24,19 @@ namespace onnxruntime { namespace test { -std::string WideToUTF8(const std::wstring& wstr) { +std::string PathToUTF8(const PathString& path) { +#ifdef WIN32 std::wstring_convert> converter; - return converter.to_bytes(wstr); + return converter.to_bytes(path); +#else + return path.c_str(); +#endif +} + +void clearFileIfExists(PathString path) { + if (std::filesystem::exists(path)) { + std::filesystem::remove(path); + } } template @@ -74,10 +86,10 @@ void VerifyOutputs(const std::vector& fetches, const std::vector dims, - bool add_fast_gelu = false) { +static void CreateBaseModel(const PathString& model_name, + std::string graph_name, + std::vector dims, + bool add_fast_gelu = false) { onnxruntime::Model model(graph_name, false, DefaultLoggingManager().DefaultLogger()); auto& graph = model.MainGraph(); std::vector inputs; @@ -143,7 +155,7 @@ void CreateBaseModel(const PathString& model_name, status = onnxruntime::Model::Save(model, model_name); } -Ort::IoBinding generate_io_binding(Ort::Session& session, std::map> shape_overwrites = {}) { +static Ort::IoBinding generate_io_binding(Ort::Session& session, std::map> shape_overwrites = {}) { Ort::IoBinding binding(session); auto allocator = Ort::AllocatorWithDefaultOptions(); for (int input_idx = 0; input_idx < int(session.GetInputCount()); ++input_idx) { @@ -178,6 +190,8 @@ Ort::IoBinding generate_io_binding(Ort::Session& session, std::map dims = {1, 3, 2}; @@ -192,9 +206,9 @@ TEST(NvExecutionProviderTest, ContextEmbedAndReload) { auto start = std::chrono::high_resolution_clock::now(); Ort::SessionOptions so; Ort::RunOptions run_options; - so.AddConfigEntry("ep.context_enable", "1"); - so.AddConfigEntry("ep.context_file_path", WideToUTF8(model_name_ctx).c_str()); - so.AppendExecutionProvider("NvTensorRtRtx", {}); + so.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1"); + so.AddConfigEntry(kOrtSessionOptionEpContextFilePath, model_name_ctx_str.c_str()); + so.AppendExecutionProvider(kNvTensorRTRTXExecutionProvider, {}); Ort::Session session_object(env, model_name.c_str(), so); auto stop = std::chrono::high_resolution_clock::now(); std::cout << "Session creation AOT: " << std::chrono::duration_cast((stop - start)).count() << " ms" << std::endl; @@ -208,9 +222,9 @@ TEST(NvExecutionProviderTest, ContextEmbedAndReload) { auto start = std::chrono::high_resolution_clock::now(); Ort::SessionOptions so; Ort::RunOptions run_options; - so.AddConfigEntry("ep.context_enable", "1"); - so.AppendExecutionProvider("NvTensorRtRtx", {}); - Ort::Session session_object(env, model_name.c_str(), so); + so.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1"); + so.AppendExecutionProvider(kNvTensorRTRTXExecutionProvider, {}); + Ort::Session session_object(env, model_name_ctx.c_str(), so); auto stop = std::chrono::high_resolution_clock::now(); std::cout << "Session creation JIT: " << std::chrono::duration_cast((stop - start)).count() << " ms" << std::endl; @@ -222,6 +236,8 @@ TEST(NvExecutionProviderTest, ContextEmbedAndReload) { TEST(NvExecutionProviderTest, ContextEmbedAndReloadDynamic) { PathString model_name = ORT_TSTR("nv_execution_provider_dyn_test.onnx"); PathString model_name_ctx = ORT_TSTR("nv_execution_provider_dyn_test_ctx.onnx"); + auto model_name_ctx_str = PathToUTF8(model_name_ctx); + clearFileIfExists(model_name_ctx); std::string graph_name = "test"; std::vector dims = {1, -1, -1}; @@ -236,9 +252,9 @@ TEST(NvExecutionProviderTest, ContextEmbedAndReloadDynamic) { auto start = std::chrono::high_resolution_clock::now(); Ort::SessionOptions so; Ort::RunOptions run_options; - so.AddConfigEntry("ep.context_enable", "1"); - so.AddConfigEntry("ep.context_file_path", WideToUTF8(model_name_ctx).c_str()); - so.AppendExecutionProvider("NvTensorRtRtx", {}); + so.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1"); + so.AddConfigEntry(kOrtSessionOptionEpContextFilePath, model_name_ctx_str.c_str()); + so.AppendExecutionProvider(kNvTensorRTRTXExecutionProvider, {}); Ort::Session session_object(env, model_name.c_str(), so); auto stop = std::chrono::high_resolution_clock::now(); std::cout << "Session creation AOT: " << std::chrono::duration_cast((stop - start)).count() << " ms" << std::endl; @@ -252,9 +268,9 @@ TEST(NvExecutionProviderTest, ContextEmbedAndReloadDynamic) { auto start = std::chrono::high_resolution_clock::now(); Ort::SessionOptions so; Ort::RunOptions run_options; - so.AddConfigEntry("ep.context_enable", "1"); - so.AppendExecutionProvider("NvTensorRtRtx", {}); - Ort::Session session_object(env, model_name.c_str(), so); + so.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1"); + so.AppendExecutionProvider(kNvTensorRTRTXExecutionProvider, {}); + Ort::Session session_object(env, model_name_ctx.c_str(), so); auto stop = std::chrono::high_resolution_clock::now(); std::cout << "Session creation JIT: " << std::chrono::duration_cast((stop - start)).count() << " ms" << std::endl; @@ -269,6 +285,8 @@ TEST(NvExecutionProviderTest, ContextEmbedAndReloadDynamic) { TEST(NvExecutionProviderTest, ContextEmbedAndReloadDataDynamic) { PathString model_name = ORT_TSTR("nv_execution_provider_data_dyn_test.onnx"); PathString model_name_ctx = ORT_TSTR("nv_execution_provider_data_dyn_test_ctx.onnx"); + auto model_name_ctx_str = PathToUTF8(model_name_ctx); + clearFileIfExists(model_name_ctx); std::string graph_name = "test"; std::vector dims = {1, -1, -1}; @@ -283,9 +301,9 @@ TEST(NvExecutionProviderTest, ContextEmbedAndReloadDataDynamic) { auto start = std::chrono::high_resolution_clock::now(); Ort::SessionOptions so; Ort::RunOptions run_options; - so.AddConfigEntry("ep.context_enable", "1"); - so.AddConfigEntry("ep.context_file_path", WideToUTF8(model_name_ctx).c_str()); - so.AppendExecutionProvider("NvTensorRtRtx", {}); + so.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1"); + so.AddConfigEntry(kOrtSessionOptionEpContextFilePath, model_name_ctx_str.c_str()); + so.AppendExecutionProvider(kNvTensorRTRTXExecutionProvider, {}); Ort::Session session_object(env, model_name.c_str(), so); auto stop = std::chrono::high_resolution_clock::now(); std::cout << "Session creation AOT: " << std::chrono::duration_cast((stop - start)).count() << " ms" << std::endl; @@ -299,9 +317,9 @@ TEST(NvExecutionProviderTest, ContextEmbedAndReloadDataDynamic) { auto start = std::chrono::high_resolution_clock::now(); Ort::SessionOptions so; Ort::RunOptions run_options; - so.AddConfigEntry("ep.context_enable", "1"); - so.AppendExecutionProvider("NvTensorRtRtx", {}); - Ort::Session session_object(env, model_name.c_str(), so); + so.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1"); + so.AppendExecutionProvider(kNvTensorRTRTXExecutionProvider, {}); + Ort::Session session_object(env, model_name_ctx.c_str(), so); auto stop = std::chrono::high_resolution_clock::now(); std::cout << "Session creation JIT: " << std::chrono::duration_cast((stop - start)).count() << " ms" << std::endl; diff --git a/tools/ci_build/github/azure-pipelines/nuget-windows-ai.yml b/tools/ci_build/github/azure-pipelines/nuget-windows-ai.yml index c6ab33164035c..753395151b620 100644 --- a/tools/ci_build/github/azure-pipelines/nuget-windows-ai.yml +++ b/tools/ci_build/github/azure-pipelines/nuget-windows-ai.yml @@ -260,12 +260,12 @@ extends: displayName: "Sign Nuget package" inputs: ConnectedServiceName: 'OnnxrunTimeCodeSign_20240611' - AppRegistrationClientId: '53d54d02-978d-4305-8572-583cf6711c4f' - AppRegistrationTenantId: '72f988bf-86f1-41af-91ab-2d7cd011db47' - AuthAKVName: 'buildkeyvault' - AuthCertName: '53d54d02-SSL-AutoRotate' - AuthSignCertName: '53d54d02-978d-4305-8572-583cf6711c4f' - + UseMSIAuthentication: true + AppRegistrationClientId: '62b7cfed-4d25-454f-880e-010dc21455ac' + AppRegistrationTenantId: '975f013f-7f24-47e8-a7d3-abc4752bf346' + EsrpClientId: "53d54d02-978d-4305-8572-583cf6711c4f" + AuthAKVName: 'ortbuildkeyvault' + AuthSignCertName: 'esrpcodesign' FolderPath: $(Build.ArtifactStagingDirectory) Pattern: '*.nupkg' SessionTimeout: 90 diff --git a/tools/ci_build/github/azure-pipelines/templates/esrp_nuget.yml b/tools/ci_build/github/azure-pipelines/templates/esrp_nuget.yml index 79cceb7a02511..ffec479474721 100644 --- a/tools/ci_build/github/azure-pipelines/templates/esrp_nuget.yml +++ b/tools/ci_build/github/azure-pipelines/templates/esrp_nuget.yml @@ -9,12 +9,12 @@ steps: displayName: 'ESRP CodeSigning' inputs: ConnectedServiceName: 'OnnxrunTimeCodeSign_20240611' - AppRegistrationClientId: '53d54d02-978d-4305-8572-583cf6711c4f' - AppRegistrationTenantId: '72f988bf-86f1-41af-91ab-2d7cd011db47' - AuthAKVName: 'buildkeyvault' - AuthCertName: '53d54d02-SSL-AutoRotate' - AuthSignCertName: '53d54d02-978d-4305-8572-583cf6711c4f' - + UseMSIAuthentication: true + AppRegistrationClientId: '62b7cfed-4d25-454f-880e-010dc21455ac' + AppRegistrationTenantId: '975f013f-7f24-47e8-a7d3-abc4752bf346' + EsrpClientId: "53d54d02-978d-4305-8572-583cf6711c4f" + AuthAKVName: 'ortbuildkeyvault' + AuthSignCertName: 'esrpcodesign' FolderPath: ${{ parameters.FolderPath }} Pattern: '*.nupkg' SessionTimeout: 90 diff --git a/tools/ci_build/github/azure-pipelines/templates/jar-maven-signing-linux.yml b/tools/ci_build/github/azure-pipelines/templates/jar-maven-signing-linux.yml index d14952e544e5e..df2aff0634819 100644 --- a/tools/ci_build/github/azure-pipelines/templates/jar-maven-signing-linux.yml +++ b/tools/ci_build/github/azure-pipelines/templates/jar-maven-signing-linux.yml @@ -7,7 +7,7 @@ steps: displayName: 'Get GnuPG signing keys' inputs: #The value below is the name of an ADO service connection. - azureSubscription: 'OnnxrunTimeCodeSign_20240611' + azureSubscription: 'AIInfraBuildOnnxRuntimeOSS' KeyVaultName: 'ort-release' SecretsFilter: 'java-pgp-pwd,java-pgp-key' RunAsPreJob: false diff --git a/tools/ci_build/github/azure-pipelines/templates/jar-maven-signing-win.yml b/tools/ci_build/github/azure-pipelines/templates/jar-maven-signing-win.yml index 5681b3568bae1..ef845dc3bf243 100644 --- a/tools/ci_build/github/azure-pipelines/templates/jar-maven-signing-win.yml +++ b/tools/ci_build/github/azure-pipelines/templates/jar-maven-signing-win.yml @@ -6,7 +6,7 @@ steps: - task: AzureKeyVault@2 displayName: 'Get GnuPG signing keys' inputs: - azureSubscription: 'OnnxrunTimeCodeSign_20240611' + azureSubscription: 'AIInfraBuildOnnxRuntimeOSS' KeyVaultName: 'ort-release' SecretsFilter: 'java-pgp-pwd,java-pgp-key' RunAsPreJob: false diff --git a/tools/ci_build/github/azure-pipelines/templates/mac-esrp-dylib.yml b/tools/ci_build/github/azure-pipelines/templates/mac-esrp-dylib.yml index a9a22945d3701..5e6cd2240feba 100644 --- a/tools/ci_build/github/azure-pipelines/templates/mac-esrp-dylib.yml +++ b/tools/ci_build/github/azure-pipelines/templates/mac-esrp-dylib.yml @@ -12,12 +12,12 @@ steps: displayName: 'ESRP CodeSigning' inputs: ConnectedServiceName: 'OnnxrunTimeCodeSign_20240611' - AppRegistrationClientId: '53d54d02-978d-4305-8572-583cf6711c4f' - AppRegistrationTenantId: '72f988bf-86f1-41af-91ab-2d7cd011db47' - AuthAKVName: 'buildkeyvault' - AuthCertName: '53d54d02-SSL-AutoRotate' - AuthSignCertName: '53d54d02-978d-4305-8572-583cf6711c4f' - + UseMSIAuthentication: true + AppRegistrationClientId: '62b7cfed-4d25-454f-880e-010dc21455ac' + AppRegistrationTenantId: '975f013f-7f24-47e8-a7d3-abc4752bf346' + EsrpClientId: "53d54d02-978d-4305-8572-583cf6711c4f" + AuthAKVName: 'ortbuildkeyvault' + AuthSignCertName: 'esrpcodesign' FolderPath: ${{ parameters.FolderPath }} Pattern: ${{ parameters.Pattern }} SessionTimeout: 90 diff --git a/tools/ci_build/github/azure-pipelines/templates/win-esrp-dll.yml b/tools/ci_build/github/azure-pipelines/templates/win-esrp-dll.yml index 86acebc9f7a71..0476bc74349bf 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-esrp-dll.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-esrp-dll.yml @@ -21,11 +21,12 @@ steps: condition: and(succeeded(), eq('${{ parameters.DoEsrp }}', true)) inputs: ConnectedServiceName: 'OnnxrunTimeCodeSign_20240611' - AppRegistrationClientId: '53d54d02-978d-4305-8572-583cf6711c4f' - AppRegistrationTenantId: '72f988bf-86f1-41af-91ab-2d7cd011db47' - AuthAKVName: 'buildkeyvault' - AuthCertName: '53d54d02-SSL-AutoRotate' - AuthSignCertName: '53d54d02-978d-4305-8572-583cf6711c4f' + UseMSIAuthentication: true + AppRegistrationClientId: '62b7cfed-4d25-454f-880e-010dc21455ac' + AppRegistrationTenantId: '975f013f-7f24-47e8-a7d3-abc4752bf346' + EsrpClientId: "53d54d02-978d-4305-8572-583cf6711c4f" + AuthAKVName: 'ortbuildkeyvault' + AuthSignCertName: 'esrpcodesign' signConfigType: inlineSignParams inlineOperation: | [