Skip to content

Commit

Permalink
fix transpose optimizer on GPU EP (#15988)
Browse files Browse the repository at this point in the history
### Description
because of #15618 , the default allocator changed to device allocator,
which will be GPU instead of CPU. in transpose optimizer we expect to
read data from initializers so a CPU allocator is required here.

this change fixes transpose optimizer on GPU EP

Fixes the issue referred to in #15869, #15796
  • Loading branch information
fs-eire authored May 19, 2023
1 parent 4324d21 commit dc06c25
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -911,9 +911,8 @@ PostLayoutTransformCostCheck(const api::GraphRef& graph, const api::NodeRef& nod
}

Status TransformLayoutForEP(Graph& graph, bool& modified, const IExecutionProvider& execution_provider,
const DebugGraphFn& debug_graph_fn) {
// sub graph recurse will be added later
auto api_graph = MakeApiGraph(graph, execution_provider.GetAllocator(OrtMemTypeDefault), nullptr);
AllocatorPtr cpu_allocator, const DebugGraphFn& debug_graph_fn) {
auto api_graph = MakeApiGraph(graph, cpu_allocator, nullptr);
const auto& layout_sensitive_ops = GetORTLayoutSensitiveOps();

for (auto& node : api_graph->Nodes()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,12 @@ const std::unordered_set<std::string_view>& GetORTLayoutSensitiveOps();
/// <param name="graph">graph to transform</param>
/// <param name="modified">indicates whether the graph is modified during transformation</param>
/// <param name="execution_provider">execution provider for which the transformation needs to be performed</param>
/// <param name="cpu_allocator">a CPU allocator used in layout transformation.
/// <param name="debug_graph_fn">Optional functor to debug the graph produced during layout transformation.
/// This is called after layout transformation if new nodes are inserted, and again after those are optimized.
/// </param>
Status TransformLayoutForEP(Graph& graph, bool& modified, const IExecutionProvider& execution_provider,
const DebugGraphFn& debug_graph_fn = {});
AllocatorPtr cpu_allocator, const DebugGraphFn& debug_graph_fn = {});

/// <summary>
/// Checks if the opset of the Graph is supported by the layout transformer.
Expand Down
18 changes: 12 additions & 6 deletions onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -942,7 +942,7 @@ common::Status InferenceSession::TransformGraph(onnxruntime::Graph& graph, bool
const layout_transformer::DebugGraphFn& debug_graph_fn) -> Status {
ORT_RETURN_IF_ERROR_SESSIONID_(
layout_transformer::TransformLayoutForEP(graph_to_transform, modified, execution_provider,
debug_graph_fn));
execution_providers_.GetDefaultCpuAllocator(), debug_graph_fn));

if (modified) {
ORT_RETURN_IF_ERROR_SESSIONID_(
Expand Down Expand Up @@ -1266,13 +1266,19 @@ Status PartitionOrtFormatModel(onnxruntime::Graph& graph,
const ExecutionProviders& providers,
KernelRegistryManager& kernel_registry_manager,
SessionState& session_state) {
layout_transformer::TransformLayoutFunction transform_layout_fn = nullptr;

#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
// only provide NCWH to NHWC layout transformer if supported
layout_transformer::TransformLayoutFunction transform_layout_fn = layout_transformer::IsSupportedOpset(graph)
? layout_transformer::TransformLayoutForEP
: nullptr;
#else // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
layout_transformer::TransformLayoutFunction transform_layout_fn{};
if (layout_transformer::IsSupportedOpset(graph)) {
transform_layout_fn =
[&providers](Graph& graph_to_transform, bool& modified,
const IExecutionProvider& execution_provider,
const layout_transformer::DebugGraphFn& debug_graph_fn) -> Status {
return layout_transformer::TransformLayoutForEP(graph_to_transform, modified, execution_provider,
providers.GetDefaultCpuAllocator(), debug_graph_fn);
};
}
#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)

GraphPartitioner partitioner(kernel_registry_manager, providers);
Expand Down
18 changes: 15 additions & 3 deletions onnxruntime/test/framework/session_state_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,11 @@ TEST_P(SessionStateTestP, TestInitializerProcessing) {

GraphPartitioner partitioner(krm, execution_providers);
status = partitioner.Partition(graph, session_state.GetMutableFuncMgr(),
layout_transformer::TransformLayoutForEP);
[&execution_providers](Graph& graph, bool& modified,
const IExecutionProvider& execution_provider,
const layout_transformer::DebugGraphFn& debug_graph_fn) -> Status {
return layout_transformer::TransformLayoutForEP(graph, modified, execution_provider, execution_providers.GetDefaultCpuAllocator(), debug_graph_fn);
});
ASSERT_TRUE(status.IsOK()) << status;

ASSERT_STATUS_OK(session_state.FinalizeSessionState(oss.str(), krm));
Expand Down Expand Up @@ -231,7 +235,11 @@ TEST(SessionStateTest, TestInitializerMemoryAllocatedUsingNonArenaMemory) {
// Partition the graph
GraphPartitioner partitioner(krm, execution_providers);
status = partitioner.Partition(graph, session_state.GetMutableFuncMgr(),
layout_transformer::TransformLayoutForEP);
[&execution_providers](Graph& graph, bool& modified,
const IExecutionProvider& execution_provider,
const layout_transformer::DebugGraphFn& debug_graph_fn) -> Status {
return layout_transformer::TransformLayoutForEP(graph, modified, execution_provider, execution_providers.GetDefaultCpuAllocator(), debug_graph_fn);
});
ASSERT_TRUE(status.IsOK()) << status;
ASSERT_STATUS_OK(session_state.FinalizeSessionState(oss.str(), krm));

Expand Down Expand Up @@ -282,7 +290,11 @@ TEST(SessionStateTest, TestInitializerMemoryAllocatedUsingNonArenaMemory) {
// Partition the graph
GraphPartitioner partitioner(krm, execution_providers);
status = partitioner.Partition(graph, session_state.GetMutableFuncMgr(),
layout_transformer::TransformLayoutForEP);
[&execution_providers](Graph& graph, bool& modified,
const IExecutionProvider& execution_provider,
const layout_transformer::DebugGraphFn& debug_graph_fn) -> Status {
return layout_transformer::TransformLayoutForEP(graph, modified, execution_provider, execution_providers.GetDefaultCpuAllocator(), debug_graph_fn);
});
ASSERT_TRUE(status.IsOK()) << status;

// Finalize the session state
Expand Down

0 comments on commit dc06c25

Please sign in to comment.