Skip to content

Commit

Permalink
mergeTransposeReshapeReorder moved to a common method
Browse files Browse the repository at this point in the history
  • Loading branch information
v-Golubev committed Nov 7, 2023
1 parent f016d8c commit c240f88
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 195 deletions.
307 changes: 113 additions & 194 deletions src/plugins/intel_cpu/src/graph_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2449,6 +2449,113 @@ bool GraphOptimizer::checkAscendingSummaryOrder(const VectorDims& transposeOrder
return true;
}

void GraphOptimizer::mergeTransposeReshapeReorder(Graph& graph,
const NodePtr& transposeNode,
const NodePtr& reshapeNode,
const NodePtr& reorderNode,
const bool reverseOrder) {
const auto& parentNode = reverseOrder ? reorderNode : transposeNode;
const auto& childNode = reverseOrder ? transposeNode : reorderNode;
auto nodeBeforeSequence = parentNode->getParentEdgesAtPort(0)[0]->getParent();
auto nodeAfterSequence = childNode->getChildEdgeAt(0)->getChild();

auto removeInputEdge = [&](const NodePtr& node, const size_t idx) {
auto remEdge = node->getParentEdgesAtPort(idx)[0];
auto parentNode = remEdge->getParent();
remEdge->drop();
auto& edges = graph.GetEdges();
for (auto it = edges.begin(); it != edges.end(); it++) {
if ((*it) == remEdge) {
edges.erase(it);
if (parentNode->getChildEdges().empty())
parentNode->remove();
break;
}
}
};

removeInputEdge(transposeNode, 1);
if (reshapeNode)
removeInputEdge(reshapeNode, 1);

// to prevent inPlace conflict we must check that the memory reference is unidirectional or
// inPlace memory is not used
const auto parentInPlace = parentNode->getParentEdgeAt(0)->inPlace(Edge::LOOK_UP);
const auto& childEdges = childNode->getChildEdgesAtPort(0);
const auto childInPlace = std::any_of(childEdges.begin(), childEdges.end(), [](const EdgePtr& edge) {
return edge->inPlace(Edge::LOOK_DOWN);
});
bool isOptimized = !(parentInPlace && childInPlace);

graph.DropNode(transposeNode);
graph.DropNode(reorderNode);
if (reshapeNode)
graph.DropNode(reshapeNode);

auto inDesc = parentNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].getMemDesc();
auto outDesc = childNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].getMemDesc();

auto inPrec = inDesc->getPrecision();
auto outPrec = outDesc->getPrecision();

auto reorderInDesc = inDesc;
auto reorderOutDesc = outDesc->cloneWithNewPrecision(inPrec);

EdgePtr edge;
for (auto& childEdge : nodeBeforeSequence->getChildEdges()) {
if (childEdge.lock()->getChild() == nodeAfterSequence) {
edge = childEdge.lock();
break;
}
}
if (!edge) {
IE_THROW() << "Parent node '" << parentNode->getName() << "' has invalid edges.";
}

// transposeNode support blocked input & non-blocked output, in the case, the Reorder after Transpose cannot be optimized
std::vector<int> srcPerm;
if (!reverseOrder) {
// case 1. transposeNode support blocked input & non-blocked output, in the case, the reorder
// cannot be optimized
// case 2. Transpose and Reorder do opposite permutation to each other as expected, but isOptimized is already
// set false due to some preliminarily checks. We need to reinterpret layout Transpose input without physical
// change of the memory.
auto* castedTranspose = dynamic_cast<Transpose*>(transposeNode.get());
if (castedTranspose == nullptr) {
IE_THROW() << "[CPU] parent node of type:" << transposeNode->getTypeStr()
<< " with name: " << transposeNode->getName() << " is not a transpose node";
}

auto inOrder = transposeNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].getMemDesc()->as<BlockedMemoryDesc>()->getOrder();
auto outOrder = reorderOutDesc->as<BlockedMemoryDesc>()->getOrder();
if (!isOptimized || inOrder.size() > outOrder.size()) {
isOptimized = false;
// inDesc should be permuted before calling reorder
auto& ord = castedTranspose->getOrder();
srcPerm = std::vector<int>(ord.size());
for (size_t i = 0; i < ord.size(); i++) {
srcPerm[ord[i]] = i;
}
}
}

std::string reorderlayerName = nodeBeforeSequence->getName() + "_" + Reorder::getReorderArgs(*reorderInDesc, *reorderOutDesc) + "_fake";
DEBUG_LOG("mergeTransposeAndReorder ", parentNode->getName(), " and ", childNode->getName(), " -> ", reorderlayerName);
auto newReorderNode = graph.InsertReorder(edge, reorderlayerName, *reorderInDesc, *reorderOutDesc, isOptimized, srcPerm);

// If precisions don't match, another reorder must be inserted to perform conversion
if (inPrec != outPrec) {
auto reorderInDesc2 = reorderOutDesc;
auto reorderOutDesc2 = outDesc;

std::string reorderLayerName2 = newReorderNode->getName() + "_" +
Reorder::getReorderArgs(*reorderInDesc2, *reorderOutDesc2) + "_" +
nodeAfterSequence->getName();

graph.InsertReorder(newReorderNode->getChildEdgeAt(0), reorderLayerName2, *reorderInDesc2, *reorderOutDesc2, false);
}
}

void GraphOptimizer::MergeTransposeAndReorder(Graph& graph) {
auto& graphNodes = graph.GetNodes();

Expand Down Expand Up @@ -2486,6 +2593,8 @@ void GraphOptimizer::MergeTransposeAndReorder(Graph& graph) {

size_t mismatchCount = 0;
for (size_t i = 0; i < inShape.size(); ++i) {
if (i + mismatchCount >= outShape.size())
return false;
if (inShape[i] != outShape[i + mismatchCount]) {
mismatchCount++;
}
Expand Down Expand Up @@ -2540,119 +2649,6 @@ void GraphOptimizer::MergeTransposeAndReorder(Graph& graph) {
return transformedOrder;
};

auto removeInputEdge = [&](const NodePtr& node, const size_t idx) {
auto remEdge = node->getParentEdgesAtPort(idx)[0];
auto parentNode = remEdge->getParent();
remEdge->drop();
auto& edges = graph.GetEdges();
for (auto it = edges.begin(); it != edges.end(); it++) {
if ((*it) == remEdge) {
edges.erase(it);
if (parentNode->getChildEdges().empty())
parentNode->remove();
break;
}
}
};

// Transpose and Reorder do opposite permutation to each other.
// Example:
// chain [physical layout: NCHW, logical layout: NCHW] -> Transpose(order=0312) -> [physical layout: NWCH, logical layout: NCHW] ->
// Reorder(nchw->nhwc) -> [physical layout: NCHW, logical layout: NHWC] can be replaced with Reorder(nchw->nhwc; isOptimized=true)
// which will just reinterprets layout without physical change of the memory.
// Two cases are possible:
// 1) inPrec = outPrec
// In this case, we replace Transpose+Reorder pattern with a new Reorder that does nothing.
// 2) inPrec != outPrec
// As in the first case, we also replace Transpose+Reorder pattern with a new Reorder.
// Additionally, we insert another Reorder that performs the conversion from the input precision (inPrec)
// to the output precision (outPrec)
auto mergeTransposeAndReorder = [&](const NodePtr& transposeNode, const NodePtr& reshapeNode, const NodePtr& reorderNode) {
auto transposeParentNode = transposeNode->getParentEdgesAtPort(0)[0]->getParent();
auto reorderChildNode = reorderNode->getChildEdgeAt(0)->getChild();

removeInputEdge(transposeNode, 1);
if (reshapeNode)
removeInputEdge(reshapeNode, 1);

// to prevent inPlace conflict we must check that the memory reference is unidirectional or
// inPlace memory is not used
const auto parentInPlace = transposeNode->getParentEdgeAt(0)->inPlace(Edge::LOOK_UP);
const auto& childEdges = reorderNode->getChildEdgesAtPort(0);
const auto childInPlace = std::any_of(childEdges.begin(), childEdges.end(),
[](const EdgePtr& edge){ return edge->inPlace(Edge::LOOK_DOWN); });
bool isOptimized = !(parentInPlace && childInPlace);

graph.DropNode(transposeNode);
graph.DropNode(reorderNode);
if (reshapeNode)
graph.DropNode(reshapeNode);

auto inDesc = transposeNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].getMemDesc();
auto outDesc = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].getMemDesc();

auto inPrec = inDesc->getPrecision();
auto outPrec = outDesc->getPrecision();

auto reorderInDesc = inDesc;
auto reorderOutDesc = outDesc->cloneWithNewPrecision(inPrec);

std::string reorderlayerName = transposeParentNode->getName() + "_" +
Reorder::getReorderArgs(*reorderInDesc, *reorderOutDesc) + "_fake";

DEBUG_LOG("mergeTransposeAndReorder ", parentNode->getName(), " and ", childNode->getName(), " -> ", reorderlayerName);

EdgePtr edge;
for (auto &childEdge : transposeParentNode->getChildEdges()) {
if (childEdge.lock()->getChild() == reorderChildNode) {
edge = childEdge.lock();
break;
}
}
if (!edge) {
IE_THROW() << "Transpose node '" << transposeNode->getName() << "' has invalid edges.";
}

std::vector<int> srcPerm;
auto configReorder = [&]() {
// case 1. transposeNode support blocked input & non-blocked output, in the case, the reorder
// cannot be optimized
// case 2. Transpose and Reorder do opposite permutation to each other as expected, but isOptimized is already set false
// due to some preliminarily checks. We need to reinterpret layout Transpose input without physical change of the memory.
auto* castedTranspose = dynamic_cast<Transpose*>(transposeNode.get());
if (castedTranspose == nullptr) {
IE_THROW() << "[CPU] parent node of type:" << transposeNode->getTypeStr() << " with name: "
<< transposeNode->getName() << " is not a transpose node";
}
auto inOrder = transposeNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].getMemDesc()->as<BlockedMemoryDesc>()->getOrder();
auto outOrder = reorderOutDesc->as<BlockedMemoryDesc>()->getOrder();
if (!isOptimized || inOrder.size() > outOrder.size()) {
isOptimized = false;
// inDesc should be permuted before calling reorder
auto& ord = castedTranspose->getOrder();
srcPerm = std::vector<int>(ord.size());
for (size_t i = 0; i < ord.size(); i++) {
srcPerm[ord[i]] = i;
}
}
};

configReorder();

auto newReorderNode = graph.InsertReorder(edge, reorderlayerName, *reorderInDesc, *reorderOutDesc, isOptimized, srcPerm);

// case 2
if (inPrec != outPrec) {
auto reorderInDesc2 = reorderOutDesc;
auto reorderOutDesc2 = outDesc;

std::string reorderLayerName2 = newReorderNode->getName() + "_" +
Reorder::getReorderArgs(*reorderInDesc2, *reorderOutDesc2) + "_" + reorderChildNode->getName();

graph.InsertReorder(newReorderNode->getChildEdgeAt(0), reorderLayerName2, *reorderInDesc2, *reorderOutDesc2, false);
}
};

for (size_t i = 0; i < graphNodes.size(); i++) {
auto parentNode = graphNodes[i];
if (!isSuitableTranspose(parentNode)) {
Expand Down Expand Up @@ -2694,7 +2690,7 @@ void GraphOptimizer::MergeTransposeAndReorder(Graph& graph) {
auto& outOrder = outBlockedDesc->getOrder();

if (checkAscendingSummaryOrder(transposeOrder, layoutOrder, inOrder, outOrder)) {
mergeTransposeAndReorder(transposeNode, reshapeNode, reorderNode);
mergeTransposeReshapeReorder(graph, transposeNode, reshapeNode, reorderNode, false);
}
}
}
Expand All @@ -2719,6 +2715,8 @@ void GraphOptimizer::MergeReorderAndTranspose(Graph &graph) {

size_t mismatchCount = 0;
for (size_t i = 0; i < outShape.size(); ++i) {
if (i + mismatchCount >= inShape.size())
return false;
if (outShape[i] != inShape[i + mismatchCount]) {
mismatchCount++;
}
Expand Down Expand Up @@ -2769,85 +2767,6 @@ void GraphOptimizer::MergeReorderAndTranspose(Graph &graph) {
return transformedOrder;
};

auto removeInputEdge = [&](const NodePtr& node, const size_t idx) {
auto remEdge = node->getParentEdgesAtPort(idx)[0];
auto parentNode = remEdge->getParent();
remEdge->drop();
auto& edges = graph.GetEdges();
for (auto it = edges.begin(); it != edges.end(); it++) {
if ((*it) == remEdge) {
edges.erase(it);
if (parentNode->getChildEdges().empty())
parentNode->remove();
break;
}
}
};

// Merge Reorder and Transpose which do opposite permutation to each other.
// Two cases are possible:
// 1) inPrec = outPrec
// In this case, we replace Reorder+Transpose pattern with a new Reorder that does nothing.
// 2) inPrec != outPrec
// As in the first case, we also replace Reorder+Transpose pattern with a new Reorder.
// Additionally, we insert another Reorder that performs the conversion from the input precision (inPrec)
// to the output precision (outPrec)
auto mergeTransposeAndReorder = [&](const NodePtr& transposeNode, const NodePtr& reshapeNode, const NodePtr& reorderNode) {
auto reorderParentNode = reorderNode->getParentEdgesAtPort(0)[0]->getParent();
auto transposeChildNode = transposeNode->getChildEdgeAt(0)->getChild();

removeInputEdge(transposeNode, 1);
if (reshapeNode)
removeInputEdge(reshapeNode, 1);

// to prevent inPlace conflict we must check that the memory reference is unidirectional or
// inPlace memory is not used
const auto parentInPlace = reorderNode->getParentEdgeAt(0)->inPlace(Edge::LOOK_UP);
const auto& childEdges = transposeNode->getChildEdgesAtPort(0);
const auto childInPlace = std::any_of(childEdges.begin(), childEdges.end(),
[](const EdgePtr& edge){ return edge->inPlace(Edge::LOOK_DOWN); });
bool isOptimized = !(parentInPlace && childInPlace);

graph.DropNode(reorderNode);
graph.DropNode(transposeNode);
if (reshapeNode)
graph.DropNode(reshapeNode);

auto inDesc = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].getMemDesc();
auto outDesc = transposeNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].getMemDesc();

auto inPrec = inDesc->getPrecision();
auto outPrec = outDesc->getPrecision();

auto reorderInDesc = inDesc;
auto reorderOutDesc = outDesc->cloneWithNewPrecision(inPrec);

std::string reorderlayerName = reorderParentNode->getName() + "_" +
Reorder::getReorderArgs(*reorderInDesc, *reorderOutDesc) + "_fake";

EdgePtr edge;
for (auto &childEdge : reorderParentNode->getChildEdges()) {
if (childEdge.lock()->getChild() == transposeChildNode) {
edge = childEdge.lock();
break;
}
}
if (!edge) {
IE_THROW() << "Transpose node '" << transposeNode->getName() << "' has invalid edges.";
}

auto newReorderNode = graph.InsertReorder(edge, reorderlayerName, *reorderInDesc, *reorderOutDesc, isOptimized);
if (inPrec != outPrec) {
auto reorderInDesc2 = reorderOutDesc;
auto reorderOutDesc2 = outDesc;

std::string reorderLayerName2 = newReorderNode->getName() + "_" +
Reorder::getReorderArgs(*reorderInDesc2, *reorderOutDesc2) + "_" + transposeChildNode->getName();

graph.InsertReorder(newReorderNode->getChildEdgeAt(0), reorderLayerName2, *reorderInDesc2, *reorderOutDesc2, false);
}
};

for (size_t i = 0; i < graphNodes.size(); i++) {
auto parentNode = graphNodes[i];
if (!isSuitableReorder(parentNode)) {
Expand Down Expand Up @@ -2889,7 +2808,7 @@ void GraphOptimizer::MergeReorderAndTranspose(Graph &graph) {
auto& outOrder = outBlockedDesc->getOrder();

if (checkAscendingSummaryOrder(transposeOrder, layoutOrder, inOrder, outOrder)) {
mergeTransposeAndReorder(transposeNode, reshapeNode, reorderNode);
mergeTransposeReshapeReorder(graph, transposeNode, reshapeNode, reorderNode, true);
}
}
}
Expand Down
16 changes: 15 additions & 1 deletion src/plugins/intel_cpu/src/graph_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,25 @@ class GraphOptimizer {
void RemoveSameConvert(Graph &graph);

// Method checks that after the sequential execution of Transpose and Reorder nodes,
// the order of the elements in the memory will not change.
// the order of the elements in the memory (physical layout) will not change.
bool checkAscendingSummaryOrder(const VectorDims& transposeOrder,
const VectorDims& layoutOrder,
const VectorDims& reorderInOrder,
const VectorDims& reorderOutOrder);
// Method merges Transpose -> Reshape(optional) -> Reorder sequences which do opposite permutation to each other.
// Reverse order Reorder -> Reshape(optional) -> Transpose is supported too.
// Reshape support has the following limitations:
// - direct order: Only reshape which separates one of the dimension on 2 consecutive ones is supported
// - reverse order: Only reshape which fuses 2 consecutive dimensions into one is supported
// Example:
// chain [physical layout: NCHW, logical layout: NCHW] -> Transpose(order=0312) -> [physical layout: NWCH, logical layout: NCHW] ->
// Reorder(nchw->nhwc) -> [physical layout: NCHW, logical layout: NHWC] can be replaced with Reorder(nchw->nhwc; isOptimized=true)
// which will just reinterprets layout without physical change of the memory.
void mergeTransposeReshapeReorder(Graph& graph,
const NodePtr& transposeNode,
const NodePtr& reshapeNode,
const NodePtr& reorderNode,
const bool reverseOrder);
};

} // namespace intel_cpu
Expand Down

0 comments on commit c240f88

Please sign in to comment.