diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc
index e2a8005aba1da..d148c4191d5d7 100644
--- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc
+++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc
@@ -1407,9 +1407,30 @@ std::unique_ptr<IndexedSubGraph> NvExecutionProvider::GetSubGraph(SubGraph_t gra
   }
 
   // Find inputs and outputs of the subgraph
+
   std::unique_ptr<IndexedSubGraph> sub_graph = onnxruntime::IndexedSubGraph::Create();
-  std::unordered_map<const NodeArg*, int> original_inputs, fused_inputs, fused_outputs, fused_outputs_to_add, graph_outputs_to_add;
+  std::unordered_map<const NodeArg*, int> original_inputs;
+
+  // These maps store the inputs and outputs of the subgraph.
+  // Please note that the inputs and outputs of the maps will be dynamically updated during node iteration
+  // to determine the final inputs and outputs of the subgraph.
+  std::unordered_map<const NodeArg*, int> fused_inputs, fused_outputs;
+
+  // This map stores the node's output that will be consumed by another node outside of this subgraph.
+  // So the node's output should be put into the subgraph's output list.
+  std::unordered_map<const NodeArg*, int> fused_outputs_to_add;
+
+  // This map stores the node's output that is original graph's output.
+  // So the node's output should be put into the subgraph's output list.
+  std::unordered_map<const NodeArg*, int> graph_outputs_to_add;
+
   std::unordered_set<const NodeArg*> erased;
+
+  // This is the relative ordering that ensures node's input or output being added to the 'fused_inputs',
+  // 'fused_outputs', 'fused_outputs_to_add' and 'graph_outputs_to_add' maps is associated with a relative order index.
+  // Items added earlier receive a smaller order index than items added later.
+  // When constructing the final sub_graph's input or output lists, entries with smaller
+  // order indices will appear before those with larger indices.
   int input_order = 0;
   int output_order = 0;
 
@@ -1428,7 +1449,7 @@ std::unique_ptr<IndexedSubGraph> NvExecutionProvider::GetSubGraph(SubGraph_t gra
         erased.insert(input);
       } else if (erased.find(input) == erased.end()) {
         // Only when input is neither in output list nor erased list, add the input to input list
-        fused_inputs[input] = input_order++;
+        fused_inputs.insert({input, input_order++});
       }
     }
 
@@ -1443,7 +1464,7 @@ std::unique_ptr<IndexedSubGraph> NvExecutionProvider::GetSubGraph(SubGraph_t gra
         erased.insert(input);
       } else if (erased.find(input) == erased.end()) {
         // Only when input is neither in output list nor erased list, add the input to input list
-        fused_inputs[input] = input_order++;
+        fused_inputs.insert({input, input_order++});
       }
     }
 
@@ -1464,39 +1485,33 @@ std::unique_ptr<IndexedSubGraph> NvExecutionProvider::GetSubGraph(SubGraph_t gra
         } else {
           output = (it->GetNode()).ImplicitInputDefs()[it->GetDstArgIndex() - static_cast<int>(it->GetNode().InputDefs().size())];
         }
-        if (node_set.find(node_idx) != node_set.end()) {
-          const auto& iter = fused_inputs.find(output);
-          if (iter != fused_inputs.end()) {
-            fused_inputs.erase(iter);
-            erased.insert(output);
-          } else if (erased.find(output) == erased.end()) {
-            if (graph_output_names.find(output->Name()) != graph_output_names.end()) {
-              graph_outputs_to_add[output] = output_order;
-            }
-            fused_outputs[output] = output_order++;
-          }
-        } else {
-          fused_outputs_to_add[output] = output_order++;
+
+        if (node_set.find(node_idx) == node_set.end()) {
+          // This output will be consumed by another node outside of this subgraph.
+          // So the output should be put into the subgraph's output list.
+          fused_outputs_to_add.insert({output, output_order++});
         }
       }
-    } else {
-      for (const auto& output : node->OutputDefs()) {
-        const auto& it = fused_inputs.find(output);
-        if (it != fused_inputs.end()) {
-          fused_inputs.erase(it);
-          erased.insert(output);
-        }
-        // Only when output is neither in input list nor erased list, and the output is consumed by another node, add the output to output list
-        else if (erased.find(output) == erased.end()) {
-          if (graph_output_names.find(output->Name()) != graph_output_names.end()) {
-            graph_outputs_to_add[output] = output_order;
-          }
+    }
 
-          if (graph.GetGraph().GetConsumerNodes(output->Name()).size() > 0) {
-            fused_outputs[output] = output_order++;
-          }
+    for (const auto& output : node->OutputDefs()) {
+      const auto& it = fused_inputs.find(output);
+      if (it != fused_inputs.end()) {
+        fused_inputs.erase(it);
+        erased.insert(output);
+      } else if (erased.find(output) == erased.end()) {
+        if (graph.GetGraph().GetConsumerNodes(output->Name()).size() > 0) {
+          // Only when output is neither in input list nor erased list,
+          // and the output is consumed by another node, add the output to output list
+          fused_outputs.insert({output, output_order++});
         }
       }
+
+      if (graph_output_names.find(output->Name()) != graph_output_names.end()) {
+        // This output is the graph's output.
+        // So the output should be put into the subgraph's output list.
+        graph_outputs_to_add.insert({output, output_order++});
+      }
     }
   }
 
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index cd0c0e4bffdb5..e5b48da33fbc3 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -2035,9 +2035,30 @@ std::unique_ptr<IndexedSubGraph> TensorrtExecutionProvider::GetSubGraph(SubGraph
   }
 
   // Find inputs and outputs of the subgraph
+
   std::unique_ptr<IndexedSubGraph> sub_graph = onnxruntime::IndexedSubGraph::Create();
-  std::unordered_map<const NodeArg*, int> original_inputs, fused_inputs, fused_outputs, fused_outputs_to_add, graph_outputs_to_add;
+  std::unordered_map<const NodeArg*, int> original_inputs;
+
+  // These maps store the inputs and outputs of the subgraph.
+  // Please note that the inputs and outputs of the maps will be dynamically updated during node iteration
+  // to determine the final inputs and outputs of the subgraph.
+  std::unordered_map<const NodeArg*, int> fused_inputs, fused_outputs;
+
+  // This map stores the node's output that will be consumed by another node outside of this subgraph.
+  // So the node's output should be put into the subgraph's output list.
+  std::unordered_map<const NodeArg*, int> fused_outputs_to_add;
+
+  // This map stores the node's output that is original graph's output.
+  // So the node's output should be put into the subgraph's output list.
+  std::unordered_map<const NodeArg*, int> graph_outputs_to_add;
+
   std::unordered_set<const NodeArg*> erased;
+
+  // This is the relative ordering that ensures node's input or output being added to the 'fused_inputs',
+  // 'fused_outputs', 'fused_outputs_to_add' and 'graph_outputs_to_add' maps is associated with a relative order index.
+  // Items added earlier receive a smaller order index than items added later.
+  // When constructing the final sub_graph's input or output lists, entries with smaller
+  // order indices will appear before those with larger indices.
   int input_order = 0;
   int output_order = 0;
 
@@ -2056,7 +2077,7 @@ std::unique_ptr<IndexedSubGraph> TensorrtExecutionProvider::GetSubGraph(SubGraph
         erased.insert(input);
       } else if (erased.find(input) == erased.end()) {
         // Only when input is neither in output list nor erased list, add the input to input list
-        fused_inputs[input] = input_order++;
+        fused_inputs.insert({input, input_order++});
       }
     }
 
@@ -2071,7 +2092,7 @@ std::unique_ptr<IndexedSubGraph> TensorrtExecutionProvider::GetSubGraph(SubGraph
         erased.insert(input);
       } else if (erased.find(input) == erased.end()) {
         // Only when input is neither in output list nor erased list, add the input to input list
-        fused_inputs[input] = input_order++;
+        fused_inputs.insert({input, input_order++});
       }
     }
 
@@ -2092,39 +2113,33 @@ std::unique_ptr<IndexedSubGraph> TensorrtExecutionProvider::GetSubGraph(SubGraph
         } else {
           output = (it->GetNode()).ImplicitInputDefs()[it->GetDstArgIndex() - static_cast<int>(it->GetNode().InputDefs().size())];
         }
-        if (node_set.find(node_idx) != node_set.end()) {
-          const auto& iter = fused_inputs.find(output);
-          if (iter != fused_inputs.end()) {
-            fused_inputs.erase(iter);
-            erased.insert(output);
-          } else if (erased.find(output) == erased.end()) {
-            if (graph_output_names.find(output->Name()) != graph_output_names.end()) {
-              graph_outputs_to_add[output] = output_order;
-            }
-            fused_outputs[output] = output_order++;
-          }
-        } else {
-          fused_outputs_to_add[output] = output_order++;
+
+        if (node_set.find(node_idx) == node_set.end()) {
+          // This output will be consumed by another node outside of this subgraph.
+          // So the output should be put into the subgraph's output list.
+          fused_outputs_to_add.insert({output, output_order++});
         }
       }
-    } else {
-      for (const auto& output : node->OutputDefs()) {
-        const auto& it = fused_inputs.find(output);
-        if (it != fused_inputs.end()) {
-          fused_inputs.erase(it);
-          erased.insert(output);
-        }
-        // Only when output is neither in input list nor erased list, and the output is consumed by another node, add the output to output list
-        else if (erased.find(output) == erased.end()) {
-          if (graph_output_names.find(output->Name()) != graph_output_names.end()) {
-            graph_outputs_to_add[output] = output_order;
-          }
+    }
 
-          if (graph.GetGraph().GetConsumerNodes(output->Name()).size() > 0) {
-            fused_outputs[output] = output_order++;
-          }
+    for (const auto& output : node->OutputDefs()) {
+      const auto& it = fused_inputs.find(output);
+      if (it != fused_inputs.end()) {
+        fused_inputs.erase(it);
+        erased.insert(output);
+      } else if (erased.find(output) == erased.end()) {
+        if (graph.GetGraph().GetConsumerNodes(output->Name()).size() > 0) {
+          // Only when output is neither in input list nor erased list,
+          // and the output is consumed by another node, add the output to output list
+          fused_outputs.insert({output, output_order++});
         }
       }
+
+      if (graph_output_names.find(output->Name()) != graph_output_names.end()) {
+        // This output is the graph's output.
+        // So the output should be put into the subgraph's output list.
+        graph_outputs_to_add.insert({output, output_order++});
+      }
     }
   }
 
diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/nv_basic_test.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/nv_basic_test.cc
index d8cc56d738175..af9706855ee3c 100644
--- a/onnxruntime/test/providers/nv_tensorrt_rtx/nv_basic_test.cc
+++ b/onnxruntime/test/providers/nv_tensorrt_rtx/nv_basic_test.cc
@@ -203,6 +203,48 @@ TEST_P(TypeTests, IOTypes) {
   }
 }
 
+TEST(NvExecutionProviderTest, TestSessionOutputs) {
+  /*
+   * Model #1:
+   *
+   * "input" ---> TopK ---
+   *                     |---> "scores"
+   *                     |--- Less ---> "Less_output_0"
+   *                     |--- Div ---> "Div_output_0"
+   *                     |--- Mod ---> "labels"
+   */
+  {
+    Ort::SessionOptions session_options;
+    session_options.AppendExecutionProvider(kNvTensorRTRTXExecutionProvider, {});
+
+    auto model_path = ORT_TSTR("testdata/topk_and_multiple_graph_outputs.onnx");
+    Ort::Session session(*ort_env, model_path, session_options);
+
+    size_t output_count = session.GetOutputCount();
+    ASSERT_TRUE(output_count == 4);
+  }
+
+  /*
+   * Model #2:
+   *
+   * "X" ---> Dropout ---> MatMul ---> "Y"
+   *          ^     |
+   *          |     |
+   * "W" ------     ----> Can't be graph's output
+   *
+   */
+  {
+    Ort::SessionOptions session_options;
+    session_options.AppendExecutionProvider(kNvTensorRTRTXExecutionProvider, {});
+
+    auto model_path = ORT_TSTR("testdata/node_output_not_used.onnx");
+    Ort::Session session(*ort_env, model_path, session_options);
+
+    size_t output_count = session.GetOutputCount();
+    ASSERT_TRUE(output_count == 1);
+  }
+}
+
 INSTANTIATE_TEST_SUITE_P(NvExecutionProviderTest, TypeTests,
                          ::testing::Values(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,
                                            ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16,
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 6a6545c68cb4f..dce0d570ec238 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -1,5 +1,6 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
+#include "onnxruntime_cxx_api.h"
 #include "core/graph/onnx_protobuf.h"
 #include "core/session/inference_session.h"
 #include "test/providers/provider_test_utils.h"
@@ -18,6 +19,8 @@ using namespace std;
 using namespace ONNX_NAMESPACE;
 using namespace ::onnxruntime::logging;
 
+extern std::unique_ptr<Ort::Env> ort_env;
+
 namespace onnxruntime {
 
 namespace test {
@@ -1360,5 +1363,49 @@ TEST(TensorrtExecutionProviderTest, RemoveCycleTest) {
   ASSERT_STATUS_OK(session_object.Run(run_options, feeds, output_names, &fetches));
   VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m);
 }
+
+TEST(TensorrtExecutionProviderTest, TestSessionOutputs) {
+  /*
+   * Model #1:
+   *
+   * "input" ---> TopK ---
+   *                     |---> "scores"
+   *                     |--- Less ---> "Less_output_0"
+   *                     |--- Div ---> "Div_output_0"
+   *                     |--- Mod ---> "labels"
+   */
+  {
+    OrtTensorRTProviderOptionsV2 provider_options;
+    Ort::SessionOptions session_options;
+    session_options.AppendExecutionProvider_TensorRT_V2(provider_options);
+
+    auto model_path = ORT_TSTR("testdata/topk_and_multiple_graph_outputs.onnx");
+    Ort::Session session(*ort_env, model_path, session_options);
+
+    size_t output_count = session.GetOutputCount();
+    ASSERT_TRUE(output_count == 4);
+  }
+
+  /*
+   * Model #2:
+   *
+   * "X" ---> Dropout ---> MatMul ---> "Y"
+   *          ^     |
+   *          |     |
+   * "W" ------     ----> Can't be graph's output
+   *
+   */
+  {
+    OrtTensorRTProviderOptionsV2 provider_options;
+    Ort::SessionOptions session_options;
+    session_options.AppendExecutionProvider_TensorRT_V2(provider_options);
+
+    auto model_path = ORT_TSTR("testdata/node_output_not_used.onnx");
+    Ort::Session session(*ort_env, model_path, session_options);
+
+    size_t output_count = session.GetOutputCount();
+    ASSERT_TRUE(output_count == 1);
+  }
+}
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/testdata/node_output_not_used.onnx b/onnxruntime/test/testdata/node_output_not_used.onnx
new file mode 100644
index 0000000000000..e2726182fddc2
Binary files /dev/null and b/onnxruntime/test/testdata/node_output_not_used.onnx differ
diff --git a/onnxruntime/test/testdata/node_output_not_used.py b/onnxruntime/test/testdata/node_output_not_used.py
new file mode 100644
index 0000000000000..d36d5e9cfd2f8
--- /dev/null
+++ b/onnxruntime/test/testdata/node_output_not_used.py
@@ -0,0 +1,43 @@
+import onnx
+from onnx import TensorProto, helper
+
+
+def create_model_with_node_output_not_used(model_path):
+    # Create graph
+    x = helper.make_tensor_value_info("X", TensorProto.FLOAT, [3, 2])
+    w = helper.make_tensor_value_info("W", TensorProto.FLOAT, [2, 3])
+    y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3])
+
+    # Dropout node (two outputs)
+    dropout_node = helper.make_node(
+        "Dropout",
+        inputs=["X"],
+        outputs=["dropout_out", "dropout_mask"],
+        name="DropoutNode",
+    )
+
+    # MatMul node
+    matmul_node = helper.make_node(
+        "MatMul",
+        inputs=["dropout_out", "W"],
+        outputs=["Y"],
+        name="MatMulNode",
+    )
+
+    graph = helper.make_graph(
+        nodes=[dropout_node, matmul_node],
+        name="DropoutMatMulGraph",
+        inputs=[x, w],
+        outputs=[y],
+    )
+
+    model = helper.make_model(graph, opset_imports=[helper.make_operatorsetid("", 13)])
+
+    onnx.checker.check_model(model)
+    onnx.save(model, model_path)
+
+    print(f"Model saved to: {model_path}")
+
+
+if __name__ == "__main__":
+    create_model_with_node_output_not_used("node_output_not_used.onnx")
diff --git a/onnxruntime/test/testdata/topk_and_multiple_graph_outputs.onnx b/onnxruntime/test/testdata/topk_and_multiple_graph_outputs.onnx
new file mode 100644
index 0000000000000..340c3d420d574
Binary files /dev/null and b/onnxruntime/test/testdata/topk_and_multiple_graph_outputs.onnx differ
diff --git a/onnxruntime/test/testdata/topk_and_multiple_graph_outputs.py b/onnxruntime/test/testdata/topk_and_multiple_graph_outputs.py
new file mode 100644
index 0000000000000..232abb2ed9163
--- /dev/null
+++ b/onnxruntime/test/testdata/topk_and_multiple_graph_outputs.py
@@ -0,0 +1,78 @@
+import onnx
+from onnx import TensorProto, helper
+
+
+def create_model_with_topk_graph_output(model_path):
+    # ======================
+    # ---- Inputs ----
+    # ======================
+    input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, ["N"])
+
+    # ======================
+    # ---- Initializers ----
+    # ======================
+    k = helper.make_tensor("K", TensorProto.INT64, dims=[1], vals=[300])
+    zero = helper.make_tensor("zero", TensorProto.INT64, dims=[], vals=[0])
+    twenty_six = helper.make_tensor("twenty_six", TensorProto.INT64, dims=[], vals=[26])
+
+    # ======================
+    # ---- Nodes ----
+    # ======================
+    topk_node = helper.make_node(
+        "TopK",
+        inputs=["input", "K"],
+        outputs=["scores", "topk_indices"],
+        name="TopK",
+    )
+
+    less_node = helper.make_node(
+        "Less",
+        inputs=["topk_indices", "zero"],
+        outputs=["Less_output_0"],
+        name="Less",
+    )
+
+    div_node = helper.make_node(
+        "Div",
+        inputs=["topk_indices", "twenty_six"],
+        outputs=["Div_17_output_0"],
+        name="Div",
+    )
+
+    mod_node = helper.make_node(
+        "Mod",
+        inputs=["topk_indices", "twenty_six"],
+        outputs=["labels"],
+        name="Mod",
+    )
+
+    # =========================
+    # ---- Graph Outputs ----
+    # =========================
+    scores_out = helper.make_tensor_value_info("scores", TensorProto.FLOAT, ["K"])
+    less_out = helper.make_tensor_value_info("Less_output_0", TensorProto.BOOL, ["K"])
+    div_out = helper.make_tensor_value_info("Div_17_output_0", TensorProto.INT64, ["K"])
+    labels_out = helper.make_tensor_value_info("labels", TensorProto.INT64, ["K"])
+
+    # ======================
+    # ---- Graph ----
+    # ======================
+    graph = helper.make_graph(
+        nodes=[topk_node, less_node, div_node, mod_node],
+        name="TopKGraph",
+        inputs=[input_tensor],
+        outputs=[scores_out, less_out, div_out, labels_out],
+        initializer=[k, zero, twenty_six],
+    )
+
+    model = helper.make_model(graph, opset_imports=[helper.make_operatorsetid("", 13)])
+
+    # Validate + Save
+    onnx.checker.check_model(model)
+    onnx.save(model, model_path)
+
+    print(f"Model saved to: {model_path}")
+
+
+if __name__ == "__main__":
+    create_model_with_topk_graph_output("topk_and_multiple_graph_outputs.onnx")