Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 74 additions & 6 deletions onnxruntime/test/framework/session_state_test.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include <fstream>
#include <iostream>
#include <absl/base/config.h>

Expand All @@ -12,6 +13,7 @@
#include "core/framework/op_kernel.h"
#include "core/framework/bfc_arena.h"
#include "core/framework/ep_context_options.h"
#include "core/framework/resource_accountant.h"
#include "core/framework/session_state.h"
#include "core/graph/graph_utils.h"
#include "core/graph/graph_viewer.h"
Expand Down Expand Up @@ -414,6 +416,45 @@ namespace {

using ParitionVerifierFn = std::function<void(const Graph&)>;

// Collect unique node names from a graph and all its subgraphs
// using the same naming scheme as the resource accountant.
static void CollectNodeNames(const Graph& graph, std::vector<std::string>& names) {
for (const auto& node : graph.Nodes()) {
names.push_back(IResourceAccountant::MakeUniqueNodeName(node));
for (const auto& [_, subgraph] : node.GetAttributeNameToSubgraphMap()) {
CollectNodeNames(*subgraph, names);
}
}
}

// Generates a node stats file dynamically from the current graph,
// assigning each node a fixed cost. Returns the total cost across
// all nodes so callers can choose a threshold relative to the actual total.
// This avoids relying on a pre-baked stats file whose node name hashes
// become stale when graph optimizers change node input/output names.
static size_t GenerateDynamicNodeStatsFile(const ORTCHAR_T* model_path,
const std::filesystem::path& output_path,
size_t cost_per_node = 1024) {
const auto& default_logger = DefaultLoggingManager().DefaultLogger();
std::shared_ptr<onnxruntime::Model> model;
EXPECT_STATUS_OK(Model::Load(model_path, model, nullptr, default_logger));
Graph& graph = model->MainGraph();
EXPECT_STATUS_OK(graph.Resolve());

std::vector<std::string> node_names;
CollectNodeNames(graph, node_names);

std::ofstream ofs(output_path);
EXPECT_TRUE(ofs.is_open());
ofs << "#name,input_sizes,initializers_sizes,total_dynamic_sizes,total_temp_allocations\n";
for (const auto& name : node_names) {
ofs << name << "," << cost_per_node << ",0,0,0\n";
}
ofs.close();

return node_names.size() * cost_per_node;
}

void LoadWithResourceAwarePartitioning(const ORTCHAR_T* model_path,
const SessionOptions& sess_options,
const ParitionVerifierFn& verifier_fn) {
Expand Down Expand Up @@ -484,37 +525,62 @@ TEST(SessionStateTest, TestResourceAwarePartitioning_NoLimit) {

TEST(SessionStateTest, TestResourceAwarePartitioning_LargeLimit) {
constexpr const ORTCHAR_T* model_path = ORT_TSTR("testdata/transformers/tiny_gpt2_beamsearch.onnx");
constexpr const char* limit_setting = "10000,tiny_gpt2_beamsearch_node_stats.txt";
const std::filesystem::path stats_path =
std::filesystem::temp_directory_path() / "tiny_gpt2_beamsearch_dynamic_stats_large.txt";

// Generate node stats dynamically so names always match the current graph
constexpr size_t cost_per_node = 1024;
size_t total_cost = GenerateDynamicNodeStatsFile(model_path, stats_path, cost_per_node);
ASSERT_GT(total_cost, 0U);

// Use a limit much larger than total cost so all nodes are assigned CUDA.
// Pass the absolute path as the stats filename — LoadNodeAllocationStats resolves
// it relative to the model directory, but an absolute path replaces the prefix.
size_t large_limit_kb = (total_cost * 2) / 1024 + 1;
std::string limit_setting = std::to_string(large_limit_kb) + "," + stats_path.string();

// Large limit, all nodes are still assigned
SessionOptions sess_options;
sess_options.enable_mem_pattern = false;
sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL;
sess_options.use_deterministic_compute = false;
sess_options.enable_mem_reuse = false;
ASSERT_STATUS_OK(sess_options.config_options.AddConfigEntry(
kOrtSessionOptionsResourceCudaPartitioningSettings, limit_setting));
kOrtSessionOptionsResourceCudaPartitioningSettings, limit_setting.c_str()));

LoadWithResourceAwarePartitioning(model_path, sess_options, [](const Graph& graph) {
const auto& graph_nodes = graph.Nodes();
for (const auto& node : graph_nodes) {
EXPECT_EQ(node.GetExecutionProviderType(), kCudaExecutionProvider);
}
});

std::filesystem::remove(stats_path);
}

TEST(SessionStateTest, TestResourceAwarePartitioning_CPUOffloaded) {
constexpr const ORTCHAR_T* model_path = ORT_TSTR("testdata/transformers/tiny_gpt2_beamsearch.onnx");
constexpr const char* limit_setting = "5000,tiny_gpt2_beamsearch_node_stats.txt";
const std::filesystem::path stats_path =
std::filesystem::temp_directory_path() / "tiny_gpt2_beamsearch_dynamic_stats_offload.txt";

// Generate node stats dynamically so names always match the current graph.
// Use a non-trivial cost per node so the threshold math works cleanly.
constexpr size_t cost_per_node = 1024;
size_t total_cost = GenerateDynamicNodeStatsFile(model_path, stats_path, cost_per_node);
ASSERT_GT(total_cost, 0U);

// Set threshold to half the total cost so some nodes must be offloaded to CPU.
// Pass the absolute path as the stats filename.
size_t half_limit_kb = (total_cost / 2) / 1024;
ASSERT_GT(half_limit_kb, 0U);
std::string limit_setting = std::to_string(half_limit_kb) + "," + stats_path.string();

// Large limit, all nodes are still assigned
SessionOptions sess_options;
sess_options.enable_mem_pattern = false;
sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL;
sess_options.use_deterministic_compute = false;
sess_options.enable_mem_reuse = false;
ASSERT_STATUS_OK(sess_options.config_options.AddConfigEntry(
kOrtSessionOptionsResourceCudaPartitioningSettings, limit_setting));
kOrtSessionOptionsResourceCudaPartitioningSettings, limit_setting.c_str()));

LoadWithResourceAwarePartitioning(model_path, sess_options, [](const Graph& graph) {
const auto& graph_nodes = graph.Nodes();
Expand All @@ -527,6 +593,8 @@ TEST(SessionStateTest, TestResourceAwarePartitioning_CPUOffloaded) {
}
EXPECT_TRUE(cpu_node_found);
});

std::filesystem::remove(stats_path);
}

#endif // USE_CUDA
Expand Down
Loading