-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[TEST] Unit tests on bf16 transformer
Signed-off-by: Alexander Peskov <[email protected]>
- Loading branch information
1 parent
d564708
commit 75051e8
Showing
2 changed files
with
174 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
172 changes: 172 additions & 0 deletions
172
inference-engine/tests/unit/cpu/bf16_transformer_test.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
// Copyright (C) 2018-2020 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include <memory> | ||
#include <gtest/gtest.h> | ||
|
||
#include <ngraph/ngraph.hpp> | ||
#include <ngraph_ops/fully_connected.hpp> | ||
|
||
#include <inference_engine.hpp> | ||
#include <details/ie_cnn_network_tools.h> | ||
#include <convert_function_to_cnn_network.hpp> | ||
#include <bf16transformer.h> | ||
|
||
using ngraph::Shape; | ||
using ngraph::element::Type; | ||
using namespace ngraph::op; | ||
using std::make_shared; | ||
using InferenceEngine::Precision; | ||
|
||
std::map<std::string, InferenceEngine::CNNLayerPtr> get_layer_collection(InferenceEngine::CNNNetwork net) { | ||
IE_SUPPRESS_DEPRECATED_START | ||
auto all_layers = InferenceEngine::details::CNNNetSortTopologically(net); | ||
IE_SUPPRESS_DEPRECATED_END | ||
|
||
std::map<std::string, InferenceEngine::CNNLayerPtr> res; | ||
for (auto &layer : all_layers) { | ||
res[layer->name] = layer; | ||
} | ||
return res; | ||
} | ||
|
||
enum TypeOfNet { NG, IE }; | ||
InferenceEngine::CNNNetwork create_net(std::shared_ptr<ngraph::Function> &func, TypeOfNet type) { | ||
InferenceEngine::CNNNetwork ng_net(func); | ||
if (type == NG) | ||
return ng_net; | ||
else | ||
return InferenceEngine::CNNNetwork {InferenceEngine::details::convertFunctionToICNNNetwork(func, ng_net)}; | ||
} | ||
|
||
|
||
TEST(BF16TransformerTest, KeepMemoryPrecision) { | ||
/* | ||
* Suggested pattern | ||
* _______ _____ | ||
* [_mem_r_] [_inp_] | ||
* _|______|_ | ||
* [___mul____] | ||
* __|__ | ||
* [_sig_] | ||
* __|__ | ||
* [_fc1_] | ||
* ___|____ | ||
* ___|___ __|__ | ||
* [_mem_w_] [_fc2_] | ||
* __|__ | ||
* [_out_] | ||
* | ||
* If does'n care about memory precision the mem_w will have precicion of data | ||
* between fc1 and fc2 operations. In case of enabled BF16 it should be BF16. | ||
* However mem_r still keep original precision. | ||
*/ | ||
Shape shape = {3, 2}; | ||
Type type = ngraph::element::f32; | ||
auto input = make_shared<Parameter>(type, shape); | ||
auto mem_i = make_shared<Constant>(type, shape, 0); | ||
auto mem_r = make_shared<ReadValue>(mem_i, "id"); | ||
mem_r->set_friendly_name("mem_r"); | ||
|
||
auto mul = make_shared<Multiply>(mem_r, input); | ||
auto sig = make_shared<Sigmoid>(mul); | ||
|
||
auto fc1_w = make_shared<Constant>(type, Shape{2, 2}, 1); | ||
auto fc1_b = make_shared<Constant>(type, Shape{2}, 1); | ||
auto fc1 = make_shared<FullyConnected>(sig, fc1_w, fc1_b, shape); | ||
|
||
auto fc2_w = make_shared<Constant>(type, Shape{2, 2}, 1); | ||
auto fc2_b = make_shared<Constant>(type, Shape{2}, 1); | ||
auto fc2 = make_shared<FullyConnected>(fc1, fc2_w, fc2_b, shape); | ||
|
||
auto mem_w = make_shared<Assign>(fc1, "id"); | ||
mem_w->set_friendly_name("mem_w"); | ||
|
||
// WA. Limitation of ngraph. control_dependency are required. | ||
mem_w->add_control_dependency(mem_r); | ||
fc2->add_control_dependency(mem_w); | ||
|
||
auto function = std::make_shared<ngraph::Function>( | ||
ngraph::NodeVector {fc2}, | ||
ngraph::ParameterVector {input}, | ||
"SimpleNet"); | ||
|
||
auto net = create_net(function, IE); | ||
|
||
// Apply tested BF16 transformation | ||
MKLDNNPlugin::BF16Transformer transformer; | ||
transformer.convertToBFloat16(net); | ||
|
||
// Check precision | ||
auto layers = get_layer_collection(net); | ||
Precision prc_mem_r = layers["mem_r"]->outData[0]->getPrecision(); | ||
Precision prc_mem_w = layers["mem_w"]->insData[0].lock()->getPrecision(); | ||
|
||
ASSERT_EQ(prc_mem_r, Precision::BF16); | ||
ASSERT_EQ(prc_mem_w, Precision::BF16); | ||
} | ||
|
||
TEST(BF16TransformerTest, DISABLED_KeepMemoryPrecisionWithGEMM) { | ||
/* | ||
* Suggested pattern | ||
* _______ _____ | ||
* [_mem_r_] [_inp_] | ||
* _|______|_ | ||
* [___mul____] | ||
* __|__ | ||
* [_sig_] | ||
* __|____ | ||
* [_gemm1_] | ||
* ___|____ | ||
* ___|___ __|____ | ||
* [_mem_w_] [_gemm2_] | ||
* __|__ | ||
* [_out_] | ||
* | ||
* If does'n care about memory precision the mem_w will have precicion of data | ||
* between fc1 and fc2 operations. In case of enabled BF16 it should be BF16. | ||
* However mem_r still keep original precision. | ||
*/ | ||
Shape shape = {3, 2}; | ||
Type type = ngraph::element::f32; | ||
auto input = make_shared<Parameter>(type, shape); | ||
auto mem_i = make_shared<Constant>(type, shape, 0); | ||
auto mem_r = make_shared<ReadValue>(mem_i, "id"); | ||
mem_r->set_friendly_name("mem_r"); | ||
|
||
auto mul = make_shared<Multiply>(mem_r, input); | ||
auto sig = make_shared<Sigmoid>(mul); | ||
|
||
auto fc1_w = make_shared<Constant>(type, Shape{2, 2}, 1); | ||
auto fc1 = make_shared<MatMul>(sig, fc1_w); | ||
|
||
auto fc2_w = make_shared<Constant>(type, Shape{2, 2}, 1); | ||
auto fc2 = make_shared<MatMul>(fc1, fc2_w); | ||
|
||
auto mem_w = make_shared<Assign>(fc1, "id"); | ||
mem_w->set_friendly_name("mem_w"); | ||
|
||
// WA. Limitation of ngraph. control_dependency are required. | ||
mem_w->add_control_dependency(mem_r); | ||
fc2->add_control_dependency(mem_w); | ||
|
||
auto function = std::make_shared<ngraph::Function>( | ||
ngraph::NodeVector {fc2}, | ||
ngraph::ParameterVector {input}, | ||
"SimpleNet"); | ||
|
||
auto net = create_net(function, IE); | ||
|
||
// Apply tested BF16 transformation | ||
MKLDNNPlugin::BF16Transformer transformer; | ||
transformer.convertToBFloat16(net); | ||
|
||
// Check precision | ||
auto layers = get_layer_collection(net); | ||
Precision prc_mem_r = layers["mem_r"]->outData[0]->getPrecision(); | ||
Precision prc_mem_w = layers["mem_w"]->insData[0].lock()->getPrecision(); | ||
|
||
ASSERT_EQ(prc_mem_r, Precision::BF16); | ||
ASSERT_EQ(prc_mem_w, Precision::BF16); | ||
} |