Skip to content

Commit 5c1b7cc

Browse files
yf711chilo-msliqunfugithub-actions[bot]adrianlizarraga
authored
[ORT 1.20.1 Release] Cherry pick 2nd round (#22845)
### Description <!-- Describe your changes. --> All three PRs are cherry-picked in this round: 1. [Refactor SkipLayerNorm and handle beta properly (#22862) ](#22862) 2. [[TensorRT EP] Exclude DDS ops from running on TRT (#22875)](#22875) 3. [[QNN EP] QNN SDK 2.28.2 (#22844) ](#22844) ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> --------- Signed-off-by: Liqun Fu <[email protected]> Signed-off-by: Liqun Fu <[email protected]> Co-authored-by: Chi Lo <[email protected]> Co-authored-by: liqun Fu <[email protected]> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Adrian Lizarraga <[email protected]>
1 parent c6156c1 commit 5c1b7cc

24 files changed

+177
-164
lines changed

onnxruntime/contrib_ops/cpu/skip_layer_norm.cc

+78-108
Original file line numberDiff line numberDiff line change
@@ -96,79 +96,6 @@ void ComputeJob(
9696
}
9797
}
9898

99-
void ComputeJob(
100-
const MLFloat16* input_data,
101-
const MLFloat16* skip_data,
102-
const float* prepacked_skip_fp32_data,
103-
const float* gamma_float_ptr,
104-
const float* beta_float_ptr,
105-
const float* bias_float_ptr,
106-
float* output_float_ptr,
107-
ptrdiff_t task_idx,
108-
int hidden_size,
109-
int64_t skip_size,
110-
float epsilon,
111-
bool simplified,
112-
MLFloat16* output_data,
113-
MLFloat16* skip_input_bias_add_output_data,
114-
AllocatorPtr alloc) {
115-
auto offset = task_idx * hidden_size;
116-
const MLFloat16* p_input = input_data + offset;
117-
MLFloat16* p_output = output_data + offset;
118-
MLFloat16* p_skip_input_bias_add_output = skip_input_bias_add_output_data == nullptr ? nullptr : skip_input_bias_add_output_data + offset;
119-
120-
float mean(0.0f);
121-
float mean_square(0.0f);
122-
const size_t num_elems = static_cast<size_t>(hidden_size);
123-
124-
IAllocatorUniquePtr<float> input_float_uptr = IAllocator::MakeUniquePtr<float>(alloc, num_elems);
125-
MlasConvertHalfToFloatBuffer(p_input, input_float_uptr.get(), num_elems);
126-
127-
IAllocatorUniquePtr<float> skip_float_uptr = nullptr;
128-
if (prepacked_skip_fp32_data == nullptr && skip_data) {
129-
const MLFloat16* p_skip = skip_data + (offset % skip_size);
130-
skip_float_uptr = IAllocator::MakeUniquePtr<float>(alloc, num_elems);
131-
MlasConvertHalfToFloatBuffer(p_skip, skip_float_uptr.get(), num_elems);
132-
}
133-
134-
const float* input_float_ptr = input_float_uptr.get();
135-
const float* skip_float_ptr = prepacked_skip_fp32_data ? prepacked_skip_fp32_data : skip_float_uptr.get();
136-
for (size_t h = 0; h < num_elems; h++) {
137-
float val = input_float_ptr[h] + skip_float_ptr[h];
138-
139-
if (bias_float_ptr) {
140-
val += bias_float_ptr[h];
141-
}
142-
143-
output_float_ptr[h] = val;
144-
mean += val;
145-
mean_square += val * val;
146-
}
147-
148-
if (nullptr != p_skip_input_bias_add_output) {
149-
MlasConvertFloatToHalfBuffer(output_float_ptr, p_skip_input_bias_add_output, num_elems);
150-
}
151-
152-
mean = mean / hidden_size;
153-
if (simplified) {
154-
mean_square = sqrt(mean_square / hidden_size + epsilon);
155-
} else {
156-
mean_square = sqrt(mean_square / hidden_size - mean * mean + epsilon);
157-
}
158-
159-
for (size_t h = 0; h < num_elems; h++) {
160-
if (simplified) {
161-
output_float_ptr[h] = output_float_ptr[h] / mean_square * gamma_float_ptr[h];
162-
} else if (nullptr == beta_float_ptr) {
163-
output_float_ptr[h] = (output_float_ptr[h] - mean) / mean_square * gamma_float_ptr[h];
164-
} else {
165-
output_float_ptr[h] = (output_float_ptr[h] - mean) / mean_square * gamma_float_ptr[h] + beta_float_ptr[h];
166-
}
167-
}
168-
169-
MlasConvertFloatToHalfBuffer(output_float_ptr, p_output, num_elems);
170-
}
171-
17299
void ConvertMLFloat16ToFloatIfNeeded(const Tensor& tensor, AllocatorPtr alloc, IAllocatorUniquePtr<float>& dest, bool& is_packed) {
173100
if (tensor.GetElementType() == utils::ToTensorProtoElementType<MLFloat16>()) {
174101
auto tensor_data_ptr = tensor.Data<MLFloat16>();
@@ -200,8 +127,8 @@ Status SkipLayerNorm<T, simplified>::Compute(OpKernelContext* p_ctx) const {
200127
const Tensor* input = p_ctx->Input<Tensor>(0);
201128
const Tensor* skip = prepacked_skip_fp32_data_ ? nullptr : p_ctx->Input<Tensor>(1);
202129
const Tensor* gamma = prepacked_gamma_fp32_data_ ? nullptr : p_ctx->Input<Tensor>(2);
203-
const Tensor* beta = prepacked_beta_fp32_data_ ? nullptr : p_ctx->Input<Tensor>(3);
204-
const Tensor* bias = prepacked_bias_fp32_data_ ? nullptr : p_ctx->Input<Tensor>(4);
130+
const Tensor* beta = simplified ? nullptr : (prepacked_beta_fp32_data_ ? nullptr : p_ctx->Input<Tensor>(3));
131+
const Tensor* bias = prepacked_bias_fp32_data_ ? nullptr : p_ctx->Input<Tensor>(simplified ? 3 : 4);
205132
Tensor* output = p_ctx->Output(0, input->Shape());
206133
// For inferencing, we support one more optional output which is the sum of the input and skip tensors
207134
Tensor* skip_input_bias_add_output = p_ctx->Output(3, input->Shape());
@@ -232,56 +159,93 @@ Status SkipLayerNorm<T, simplified>::Compute(OpKernelContext* p_ctx) const {
232159

233160
// For inferencing, we support one more optional output which is the sum of the input and skip tensors
234161
T* skip_input_bias_add_output_data = skip_input_bias_add_output == nullptr ? nullptr : skip_input_bias_add_output->MutableData<T>();
235-
236162
const int64_t skip_size = skip ? skip->Shape().Size() : prepacked_skip_fp32_size_;
237163

238-
AllocatorPtr alloc;
239-
ORT_RETURN_IF_ERROR(p_ctx->GetTempSpaceAllocator(&alloc));
240-
241-
IAllocatorUniquePtr<float> output_fp32;
242-
IAllocatorUniquePtr<float> gamma_fp32;
243-
IAllocatorUniquePtr<float> beta_fp32;
244-
IAllocatorUniquePtr<float> bias_fp32;
245-
246164
if constexpr (std::is_same_v<T, MLFloat16>) {
165+
const size_t total_data_size = static_cast<size_t>(input->Shape().Size());
166+
167+
AllocatorPtr alloc;
168+
ORT_RETURN_IF_ERROR(p_ctx->GetTempSpaceAllocator(&alloc));
169+
170+
IAllocatorUniquePtr<float> input_fp32;
171+
IAllocatorUniquePtr<float> output_fp32;
172+
IAllocatorUniquePtr<float> skip_input_bias_add_output_fp32;
173+
IAllocatorUniquePtr<float> skip_fp32;
174+
IAllocatorUniquePtr<float> gamma_fp32;
175+
IAllocatorUniquePtr<float> beta_fp32;
176+
IAllocatorUniquePtr<float> bias_fp32;
177+
178+
const float* input_data_f = nullptr;
179+
const float* skip_data_f = nullptr;
180+
const float* gamma_data_f = nullptr;
181+
const float* beta_data_f = nullptr;
182+
const float* bias_data_f = nullptr;
183+
float* output_data_f = nullptr;
184+
float* skip_input_bias_add_output_data_f = nullptr;
185+
247186
const size_t num_elems = static_cast<size_t>(hidden_size);
248187

249-
output_fp32 = IAllocator::MakeUniquePtr<float>(alloc, num_elems);
188+
input_fp32 = IAllocator::MakeUniquePtr<float>(alloc, total_data_size);
189+
MlasConvertHalfToFloatBuffer(input_data, input_fp32.get(), total_data_size);
190+
input_data_f = input_fp32.get();
191+
192+
output_fp32 = IAllocator::MakeUniquePtr<float>(alloc, total_data_size);
193+
output_data_f = output_fp32.get();
194+
195+
skip_input_bias_add_output_fp32 = IAllocator::MakeUniquePtr<float>(alloc, total_data_size);
196+
skip_input_bias_add_output_data_f = skip_input_bias_add_output_fp32.get();
250197

251-
if (prepacked_gamma_fp32_data_ == nullptr && gamma_data) {
198+
if (skip_data) {
199+
skip_fp32 = IAllocator::MakeUniquePtr<float>(alloc, static_cast<size_t>(skip_size));
200+
MlasConvertHalfToFloatBuffer(skip_data, skip_fp32.get(), static_cast<size_t>(skip_size));
201+
skip_data_f = skip_fp32.get();
202+
} else if (prepacked_skip_fp32_data_) {
203+
skip_data_f = prepacked_skip_fp32_data_.get();
204+
}
205+
206+
if (gamma_data) {
252207
gamma_fp32 = IAllocator::MakeUniquePtr<float>(alloc, num_elems);
253208
MlasConvertHalfToFloatBuffer(gamma_data, gamma_fp32.get(), num_elems);
209+
gamma_data_f = gamma_fp32.get();
210+
} else if (prepacked_gamma_fp32_data_) {
211+
gamma_data_f = prepacked_gamma_fp32_data_.get();
254212
}
255213

256-
if (prepacked_beta_fp32_data_ == nullptr && beta_data) {
214+
if (beta_data) {
257215
beta_fp32 = IAllocator::MakeUniquePtr<float>(alloc, num_elems);
258216
MlasConvertHalfToFloatBuffer(beta_data, beta_fp32.get(), num_elems);
217+
beta_data_f = beta_fp32.get();
218+
} else if (prepacked_beta_fp32_data_) {
219+
beta_data_f = prepacked_beta_fp32_data_.get();
259220
}
260221

261-
if (prepacked_bias_fp32_data_ == nullptr && bias_data) {
222+
if (bias_data) {
262223
bias_fp32 = IAllocator::MakeUniquePtr<float>(alloc, num_elems);
263224
MlasConvertHalfToFloatBuffer(bias_data, bias_fp32.get(), num_elems);
225+
bias_data_f = bias_fp32.get();
226+
} else if (prepacked_bias_fp32_data_) {
227+
bias_data_f = prepacked_bias_fp32_data_.get();
264228
}
265-
}
266229

267-
concurrency::ThreadPool::TryBatchParallelFor(
268-
p_ctx->GetOperatorThreadPool(), static_cast<int32_t>(task_count),
269-
[&](ptrdiff_t task_idx) {
270-
if constexpr (std::is_same_v<T, MLFloat16>) {
271-
ComputeJob(input_data, skip_data,
272-
prepacked_skip_fp32_data_.get(),
273-
prepacked_gamma_fp32_data_ ? prepacked_gamma_fp32_data_.get() : gamma_fp32.get(),
274-
prepacked_beta_fp32_data_ ? prepacked_beta_fp32_data_.get() : beta_fp32.get(),
275-
prepacked_bias_fp32_data_ ? prepacked_bias_fp32_data_.get() : bias_fp32.get(),
276-
output_fp32.get(),
277-
task_idx, hidden_size, skip_size, epsilon_, simplified, output_data,
278-
skip_input_bias_add_output_data, alloc);
279-
} else {
230+
concurrency::ThreadPool::TryBatchParallelFor(
231+
p_ctx->GetOperatorThreadPool(), static_cast<int32_t>(task_count),
232+
[&](ptrdiff_t task_idx) {
233+
ComputeJob(input_data_f, skip_data_f, gamma_data_f, beta_data_f, bias_data_f, task_idx, hidden_size, skip_size,
234+
epsilon_, simplified, output_data_f, skip_input_bias_add_output_data_f);
235+
},
236+
0);
237+
MlasConvertFloatToHalfBuffer(output_data_f, output_data, total_data_size);
238+
if (skip_input_bias_add_output_data != nullptr)
239+
MlasConvertFloatToHalfBuffer(skip_input_bias_add_output_data_f, skip_input_bias_add_output_data, total_data_size);
240+
} else {
241+
concurrency::ThreadPool::TryBatchParallelFor(
242+
p_ctx->GetOperatorThreadPool(), static_cast<int32_t>(task_count),
243+
[&](ptrdiff_t task_idx) {
280244
ComputeJob(input_data, skip_data, gamma_data, beta_data, bias_data, task_idx, hidden_size, skip_size,
281245
epsilon_, simplified, output_data, skip_input_bias_add_output_data);
282-
}
283-
},
284-
0);
246+
},
247+
0);
248+
}
285249

286250
return Status::OK();
287251
}
@@ -290,16 +254,22 @@ template <typename T, bool simplified>
290254
Status SkipLayerNorm<T, simplified>::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
291255
bool& is_packed, PrePackedWeights* prepacked_weights) {
292256
ORT_UNUSED_PARAMETER(prepacked_weights);
293-
294257
is_packed = false;
295258
if (input_idx == 1) { // skip
296259
prepacked_skip_fp32_size_ = tensor.Shape().Size();
297260
ConvertMLFloat16ToFloatIfNeeded(tensor, alloc, prepacked_skip_fp32_data_, is_packed);
298261
} else if (input_idx == 2) { // gamma
299262
ConvertMLFloat16ToFloatIfNeeded(tensor, alloc, prepacked_gamma_fp32_data_, is_packed);
300-
} else if (input_idx == 3) { // beta
301-
ConvertMLFloat16ToFloatIfNeeded(tensor, alloc, prepacked_beta_fp32_data_, is_packed);
263+
} else if (input_idx == 3) {
264+
if constexpr (simplified) {
265+
// bias
266+
ConvertMLFloat16ToFloatIfNeeded(tensor, alloc, prepacked_bias_fp32_data_, is_packed);
267+
} else {
268+
// beta
269+
ConvertMLFloat16ToFloatIfNeeded(tensor, alloc, prepacked_beta_fp32_data_, is_packed);
270+
}
302271
} else if (input_idx == 4) { // bias
272+
ORT_ENFORCE(!simplified, "SkipSimplifiedLayerNormalization should only has 4 inputs (input, skip, gamma, and beta). Got 5.");
303273
ConvertMLFloat16ToFloatIfNeeded(tensor, alloc, prepacked_bias_fp32_data_, is_packed);
304274
}
305275

onnxruntime/core/providers/qnn/builder/opbuilder/layer_norm_op_builder.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,10 @@ Status LayerNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
8787
ORT_RETURN_IF_ERROR(ProcessInput(qnn_model_wrapper, inputs[BIAS_IDX], logger, input_names));
8888
}
8989

90-
#if QNN_API_VERSION_MAJOR == 2 && (QNN_API_VERSION_MINOR >= 17)
90+
#if QNN_API_VERSION_MAJOR == 2 && QNN_API_VERSION_MINOR >= 17 && QNN_API_VERSION_MINOR <= 20
9191
if (!has_bias_input && IsNpuBackend(qnn_model_wrapper.GetQnnBackendType())) {
92-
// Bias is implicit. QNN SDK 2.24+ (QNN API version 2.17+) has a validation bug for implicit bias inputs,
93-
// so provide an explicit bias of all 0 (quantized int32).
92+
// Bias is implicit. QNN SDK 2.24 to 2.27 (QNN API version 2.17 to 2.20) has a validation bug for
93+
// implicit bias inputs, so provide an explicit bias of all 0 (quantized int32).
9494
TensorInfo x_input_info = {};
9595
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[X_IDX], x_input_info));
9696

onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

+9-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "DSP/QnnDspCommon.h"
1515
#include "HTP/QnnHtpCommon.h"
1616
#include "HTP/QnnHtpContext.h"
17+
#include "Saver/QnnSaver.h"
1718
#include <gsl/gsl>
1819
#include "core/framework/endian_utils.h"
1920
#include "core/common/logging/capture.h"
@@ -1040,7 +1041,14 @@ Status QnnBackendManager::ExtractBackendProfilingInfo() {
10401041
const QnnProfile_EventId_t* profile_events{nullptr};
10411042
uint32_t num_events{0};
10421043
Qnn_ErrorHandle_t result = qnn_interface_.profileGetEvents(profile_backend_handle_, &profile_events, &num_events);
1043-
ORT_RETURN_IF(QNN_PROFILE_NO_ERROR != result, "Failed to get profile events. Error: ", QnnErrorHandleToString(result));
1044+
if (!qnn_saver_path_.empty()) { // Using QNN Saver backend
1045+
// QNN SDK 2.28.2 returns QNN_SAVER_ERROR_DUMMY_RETVALUE, but previous QNN versions return QNN_PROFILE_NO_ERROR.
1046+
// We accept both values.
1047+
ORT_RETURN_IF(QNN_PROFILE_NO_ERROR != result && QNN_SAVER_ERROR_DUMMY_RETVALUE != result,
1048+
"Failed to get profile events. Error: ", QnnErrorHandleToString(result));
1049+
} else {
1050+
ORT_RETURN_IF(QNN_PROFILE_NO_ERROR != result, "Failed to get profile events. Error: ", QnnErrorHandleToString(result));
1051+
}
10441052

10451053
if (num_events > 0) {
10461054
LOGS(*logger_, VERBOSE) << "profile_events: " << profile_events << " num_events: " << num_events;

onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc

+56-18
Original file line numberDiff line numberDiff line change
@@ -1725,6 +1725,10 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
17251725
runtime_ = std::unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(GetTensorrtLogger(detailed_build_log_)));
17261726
}
17271727

1728+
trt_version_ = getInferLibVersion();
1729+
1730+
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] TensorRT version is " << trt_version_;
1731+
17281732
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] TensorRT provider options: "
17291733
<< "device_id: " << device_id_
17301734
<< ", trt_max_partition_iterations: " << max_partition_iterations_
@@ -2462,10 +2466,30 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
24622466
std::vector<size_t> nodes_vector(number_of_ort_nodes);
24632467
std::iota(std::begin(nodes_vector), std::end(nodes_vector), 0);
24642468

2465-
std::vector<size_t> filtered_nodes_vector;
2469+
std::set<std::string> exclude_ops_set;
2470+
2471+
/*
2472+
* There is a known performance issue with the DDS ops (NonMaxSuppression, NonZero and RoiAlign) in TRT 10.
2473+
* TRT EP automatically excludes DDS ops from running on TRT.
2474+
*/
2475+
if (trt_version_ >= 100000 && trt_version_ < 110000) {
2476+
exclude_ops_set.insert("NonMaxSuppression");
2477+
exclude_ops_set.insert("NonZero");
2478+
exclude_ops_set.insert("RoiAlign");
2479+
LOGS_DEFAULT(VERBOSE) << "There is a known performance issue with the DDS ops (NonMaxSuppression, NonZero and RoiAlign) in TRT 10. TRT EP automatically excludes DDS ops from running on TRT, if applicable";
2480+
}
2481+
2482+
SubGraphCollection_t parser_nodes_vector, supported_nodes_vector;
24662483
const std::vector<NodeIndex>& node_index = graph.GetNodesInTopologicalOrder(1 /*priority-based topological sort*/);
2484+
bool new_subgraph = true;
2485+
2486+
/* Iterate all the nodes and exclude the node if:
2487+
* 1. It's a control flow op and its subgraph(s) is not fully TRT eligible.
2488+
* 2. It's a DDS op.
2489+
*/
24672490
for (const auto& index : nodes_vector) {
24682491
const auto& node = graph.GetNode(node_index[index]);
2492+
bool supported_node = true;
24692493

24702494
/* If current node is control flow op, we take different approach based on following four cases:
24712495
*
@@ -2477,29 +2501,43 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
24772501
* For cases 2, 3, 4, even though the control flow op is not assigned to TRT, any portion of its subgraphs that can run in TRT will be still fused and assigned to TRT EP.
24782502
*/
24792503
if (control_flow_op_set_.find(node->OpType()) != control_flow_op_set_.end()) {
2480-
auto sub_graphs = node->GetSubgraphs();
2481-
if (sub_graphs.size() != 0) {
2482-
bool all_subgraphs_are_supported = true;
2483-
for (auto sub_graph : sub_graphs) {
2484-
// TRT EP should consider the empty subgraph is fully supported by TRT.
2485-
if (sub_graph->CreateGraphViewer()->NumberOfNodes() == 0) {
2486-
continue;
2487-
}
2488-
if (!AllNodesAssignedToSpecificEP(*(sub_graph->CreateGraphViewer()), kTensorrtExecutionProvider)) {
2489-
all_subgraphs_are_supported = false;
2490-
break;
2504+
auto supported_control_flow_op = [&](const Node* node) {
2505+
auto sub_graphs = node->GetSubgraphs();
2506+
if (sub_graphs.size() != 0) {
2507+
for (auto sub_graph : sub_graphs) {
2508+
// TRT EP should consider the empty subgraph is fully supported by TRT.
2509+
if (sub_graph->CreateGraphViewer()->NumberOfNodes() == 0) {
2510+
continue;
2511+
}
2512+
if (!AllNodesAssignedToSpecificEP(*(sub_graph->CreateGraphViewer()), kTensorrtExecutionProvider)) {
2513+
// if not all its subgraphs are supported, we need to exclude this control flow op
2514+
return false;
2515+
}
24912516
}
24922517
}
2493-
if (!all_subgraphs_are_supported) {
2494-
// if not all its subgraphs are supported, we need to exclude this control flow op
2495-
continue;
2496-
}
2518+
return true;
2519+
};
2520+
supported_node = supported_control_flow_op(node);
2521+
}
2522+
2523+
// Exclude any ops, if applicable
2524+
if (exclude_ops_set.find(node->OpType()) != exclude_ops_set.end()) {
2525+
supported_node = false;
2526+
}
2527+
2528+
if (supported_node) {
2529+
if (new_subgraph) {
2530+
parser_nodes_vector.emplace_back();
2531+
// Mark all new graphs as "UnKnown" which will later be parsed by TRT parser
2532+
parser_nodes_vector.back().second = false;
2533+
new_subgraph = false;
24972534
}
2535+
parser_nodes_vector.back().first.emplace_back(index);
2536+
} else {
2537+
new_subgraph = true;
24982538
}
2499-
filtered_nodes_vector.push_back(index);
25002539
}
25012540

2502-
SubGraphCollection_t supported_nodes_vector, parser_nodes_vector = {{filtered_nodes_vector, false}};
25032541
bool early_termination = false;
25042542
supported_nodes_vector = GetSupportedList(parser_nodes_vector, 0, max_partition_iterations_, graph, &early_termination);
25052543
if (early_termination) {

onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h

+4
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,10 @@ class TensorrtExecutionProvider : public IExecutionProvider {
329329
bool cuda_graph_enable_ = false;
330330
std::string cache_prefix_;
331331
bool engine_hw_compatible_ = false;
332+
std::string op_types_to_exclude_;
333+
334+
// The format is as for TENSORRT_VERSION: (MAJOR * 100 + MINOR) * 100 + PATCH
335+
int32_t trt_version_;
332336

333337
// The OrtAllocator object will be get during ep compute time
334338
// and should be kept for the lifetime of TRT EP object.

0 commit comments

Comments
 (0)