You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
### Description
<!-- Describe your changes. -->
All three PRs are cherry-picked in this round:
1. [Refactor SkipLayerNorm and handle beta properly (#22862)
](#22862)
2. [[TensorRT EP] Exclude DDS ops from running on TRT
(#22875)](#22875)
3. [[QNN EP] QNN SDK 2.28.2 (#22844)
](#22844)
### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
---------
Signed-off-by: Liqun Fu <[email protected]>
Signed-off-by: Liqun Fu <[email protected]>
Co-authored-by: Chi Lo <[email protected]>
Co-authored-by: liqun Fu <[email protected]>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Adrian Lizarraga <[email protected]>
* There is a known performance issue with the DDS ops (NonMaxSuppression, NonZero and RoiAlign) in TRT 10.
2473
+
* TRT EP automatically excludes DDS ops from running on TRT.
2474
+
*/
2475
+
if (trt_version_ >= 100000 && trt_version_ < 110000) {
2476
+
exclude_ops_set.insert("NonMaxSuppression");
2477
+
exclude_ops_set.insert("NonZero");
2478
+
exclude_ops_set.insert("RoiAlign");
2479
+
LOGS_DEFAULT(VERBOSE) << "There is a known performance issue with the DDS ops (NonMaxSuppression, NonZero and RoiAlign) in TRT 10. TRT EP automatically excludes DDS ops from running on TRT, if applicable";
* For cases 2, 3, 4, even though the control flow op is not assigned to TRT, any portion of its subgraphs that can run in TRT will be still fused and assigned to TRT EP.
2478
2502
*/
2479
2503
if (control_flow_op_set_.find(node->OpType()) != control_flow_op_set_.end()) {
2480
-
auto sub_graphs = node->GetSubgraphs();
2481
-
if (sub_graphs.size() != 0) {
2482
-
bool all_subgraphs_are_supported = true;
2483
-
for (auto sub_graph : sub_graphs) {
2484
-
// TRT EP should consider the empty subgraph is fully supported by TRT.
2485
-
if (sub_graph->CreateGraphViewer()->NumberOfNodes() == 0) {
2486
-
continue;
2487
-
}
2488
-
if (!AllNodesAssignedToSpecificEP(*(sub_graph->CreateGraphViewer()), kTensorrtExecutionProvider)) {
2489
-
all_subgraphs_are_supported = false;
2490
-
break;
2504
+
auto supported_control_flow_op = [&](const Node* node) {
2505
+
auto sub_graphs = node->GetSubgraphs();
2506
+
if (sub_graphs.size() != 0) {
2507
+
for (auto sub_graph : sub_graphs) {
2508
+
// TRT EP should consider the empty subgraph is fully supported by TRT.
2509
+
if (sub_graph->CreateGraphViewer()->NumberOfNodes() == 0) {
2510
+
continue;
2511
+
}
2512
+
if (!AllNodesAssignedToSpecificEP(*(sub_graph->CreateGraphViewer()), kTensorrtExecutionProvider)) {
2513
+
// if not all its subgraphs are supported, we need to exclude this control flow op
2514
+
returnfalse;
2515
+
}
2491
2516
}
2492
2517
}
2493
-
if (!all_subgraphs_are_supported) {
2494
-
// if not all its subgraphs are supported, we need to exclude this control flow op
2495
-
continue;
2496
-
}
2518
+
returntrue;
2519
+
};
2520
+
supported_node = supported_control_flow_op(node);
2521
+
}
2522
+
2523
+
// Exclude any ops, if applicable
2524
+
if (exclude_ops_set.find(node->OpType()) != exclude_ops_set.end()) {
2525
+
supported_node = false;
2526
+
}
2527
+
2528
+
if (supported_node) {
2529
+
if (new_subgraph) {
2530
+
parser_nodes_vector.emplace_back();
2531
+
// Mark all new graphs as "UnKnown" which will later be parsed by TRT parser
0 commit comments