Skip to content

Commit

Permalink
[IPU] add more loss ops (#44646)
Browse files Browse the repository at this point in the history
* add more loss ops

* add authors

Co-authored-by: Zhaorui Chen <[email protected]>

Co-authored-by: Zhaorui Chen <[email protected]>
  • Loading branch information
gglin001 and czr-gc authored Jul 27, 2022
1 parent 8a07d02 commit 8bf7cd8
Show file tree
Hide file tree
Showing 5 changed files with 519 additions and 6 deletions.
212 changes: 212 additions & 0 deletions paddle/fluid/platform/device/ipu/popart_canonicalization/loss_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,123 @@ Node *kldiv_loss_handler(Graph *graph, Node *node) {
return loss;
}

Node *sigmoid_cross_entropy_with_logits_handler(Graph *graph, Node *node) {
// Out = max(logits, 0) - logits * label + log(1 + exp(-abs(logits)))
auto *op = node->Op();
int reduction = 2;
if (is_dynamic_graph()) {
reduction = RemoveTailReduction(graph, node, "Out");
}
bool append_identity_loss =
is_dynamic_graph() && IsLastVarNode(GetOutputVarNode("Out", node));

auto logits = GetInputVarNode("X", node);
auto label = GetInputVarNode("Label", node);
// sigmoid_cross_entropy_with_logits uses float label as input.
auto ignore_index_value =
static_cast<float>(PADDLE_GET_CONST(int, op->GetAttr("ignore_index")));
auto normalize = PADDLE_GET_CONST(bool, op->GetAttr("normalize"));

// const
auto one = CreateConst(
graph, node, std::vector<float>{1.0}, {1}, GetVarDType(logits))
->outputs.front();
auto zero =
CreateConst(
graph, node, std::vector<float>{0.0}, {1}, GetVarDType(logits))
->outputs.front();
auto ignore_index = CreateConst(graph,
node,
std::vector<float>{ignore_index_value},
{1},
GetVarDType(label))
->outputs.front();
// max(logits, 0)
auto max_zero =
CreateBaseOp(graph, node, "popart_max", {logits, zero}, {}, {})
->outputs.front();

// logits * label
auto mul = CreateBaseOp(graph, node, "popart_mul", {logits, label}, {}, {})
->outputs.front();

// abs(logits)
auto abs = CreateBaseOp(graph, node, "popart_abs", {logits}, {}, {})
->outputs.front();
// -abs(logits)
auto neg_abs =
CreateBaseOp(graph, node, "popart_neg", {abs}, {}, {})->outputs.front();
// exp(-abs(logits))
auto exp_neg_abs = CreateBaseOp(graph, node, "popart_exp", {neg_abs}, {}, {})
->outputs.front();
// 1+exp(-abs(logits))
auto log_term =
CreateBaseOp(graph, node, "popart_add", {exp_neg_abs, one}, {}, {})
->outputs.front();
// log(1+exp(-abs(logits)))
auto log = CreateBaseOp(graph, node, "popart_log", {log_term}, {}, {})
->outputs.front();

// max(logits, 0) - logits * label
auto sub = CreateBaseOp(graph, node, "popart_sub", {max_zero, mul}, {}, {})
->outputs.front();
// max(logits, 0) - logits * label + log(1 + exp(-abs(logits)))
auto loss = CreateBaseOp(graph, node, "popart_add", {sub, log}, {}, {})
->outputs.front();

// label == ignore_index ? 0 : loss
auto equal_cond =
CreateBaseOp(graph, node, "popart_equal", {label, ignore_index}, {}, {})
->outputs.front();
loss = CreateBaseOp(graph,
node,
"popart_where",
{equal_cond, zero, loss},
append_identity_loss || normalize
? std::vector<Node *>{}
: std::vector<Node *>{GetOutputVarNode("Out", node)},
{});

if (normalize) {
// normalize the output as: loss = loss / sum(label != ignore_index)
auto not_equal =
CreateBaseOp(graph, node, "popart_logical_not", {equal_cond}, {}, {})
->outputs.front();
auto mask =
CreateCast(graph, node, {not_equal}, {}, logits->Var()->GetDataType())
->outputs.front();
auto sum = CreateBaseOp(graph,
node,
"popart_reducesum",
{mask},
{},
{{"keepdims", int64_t{0}}})
->outputs.front();
auto eps =
CreateConst(
graph, node, std::vector<float>{1e-5}, {1}, GetVarDType(logits))
->outputs.front();
// avoid division by zero
auto add_eps = CreateBaseOp(graph, node, "popart_add", {sum, eps}, {}, {})
->outputs.front();
loss =
CreateBaseOp(graph,
node,
"popart_div",
{loss->outputs[0], add_eps},
append_identity_loss
? std::vector<Node *>{}
: std::vector<Node *>{GetOutputVarNode("Out", node)},
{});
}

if (append_identity_loss) {
loss = CreateIdentityLossOp(
graph, node, loss->outputs, {GetOutputVarNode("Out", node)}, reduction);
}
return loss;
}

Node *binary_cross_entropy_handler(Graph *graph, Node *node) {
// Out = -1 * weight * (label * log(x) + (1 - label) * log(1 - x))
int reduction = 2;
Expand Down Expand Up @@ -493,6 +610,97 @@ Node *warpctc_handler(Graph *graph, Node *node) {
return loss;
}

Node *rank_loss_handler(Graph *graph, Node *node) {
// (1.0f + (left - right).exp()).log() - label * (left - right)
auto label = GetInputVarNode("Label", node);
auto left = GetInputVarNode("Left", node);
auto right = GetInputVarNode("Right", node);
auto output = GetOutputVarNode("Out", node);
int reduction = 2;
if (is_dynamic_graph()) {
reduction = RemoveTailReduction(graph, node, "Out");
}
bool append_identity_loss = is_dynamic_graph() && IsLastVarNode(output);

auto sub = CreateBaseOp(graph, node, "popart_sub", {left, right}, {}, {})
->outputs.front();
auto mul = CreateBaseOp(graph, node, "popart_mul", {label, sub}, {}, {})
->outputs.front();
// const
auto one =
CreateConst(graph, node, std::vector<float>{1.0}, {1}, GetVarDType(label))
->outputs.front();
auto exp =
CreateBaseOp(graph, node, "popart_exp", {sub}, {}, {})->outputs.front();
auto add = CreateBaseOp(graph, node, "popart_add", {one, exp}, {}, {})
->outputs.front();
auto log =
CreateBaseOp(graph, node, "popart_log", {add}, {}, {})->outputs.front();
auto loss = CreateBaseOp(graph,
node,
"popart_sub",
{log, mul},
append_identity_loss ? std::vector<Node *>{}
: std::vector<Node *>{output},
{})
->outputs.front();
if (append_identity_loss) {
loss =
CreateIdentityLossOp(graph, node, loss->outputs, {output}, reduction);
}
return loss;
}

Node *margin_rank_loss_handler(Graph *graph, Node *node) {
// rank_loss = max(0, -label * (left - right) + margin)
auto *op = node->Op();
auto label = GetInputVarNode("Label", node);
auto left = GetInputVarNode("X1", node);
auto right = GetInputVarNode("X2", node);
auto output = GetOutputVarNode("Out", node);
auto margin_value = PADDLE_GET_CONST(float, op->GetAttr("margin"));
int reduction = 2;
if (is_dynamic_graph()) {
reduction = RemoveTailReduction(graph, node, "Out");
}
bool append_identity_loss = is_dynamic_graph() && IsLastVarNode(output);

// -(left - right)
auto sub = CreateBaseOp(graph, node, "popart_sub", {right, left}, {}, {})
->outputs.front();
// -label * (left - right)
auto mul = CreateBaseOp(graph, node, "popart_mul", {label, sub}, {}, {})
->outputs.front();
// const
auto zero =
CreateConst(graph, node, std::vector<float>{0.0}, {1}, GetVarDType(label))
->outputs.front();
auto margin = CreateConst(graph,
node,
std::vector<float>{margin_value},
{1},
GetVarDType(label))
->outputs.front();
auto margin_add =
CreateBaseOp(graph, node, "popart_add", {mul, margin}, {}, {})
->outputs.front();

// max(0, term)
auto loss = CreateBaseOp(graph,
node,
"popart_max",
{zero, margin_add},
append_identity_loss ? std::vector<Node *>{}
: std::vector<Node *>{output},
{})
->outputs.front();
if (append_identity_loss) {
loss =
CreateIdentityLossOp(graph, node, loss->outputs, {output}, reduction);
}
return loss;
}

} // namespace
} // namespace ipu
} // namespace platform
Expand All @@ -502,7 +710,11 @@ REGISTER_HANDLER(identity_loss, identity_loss_handler);
REGISTER_HANDLER(softmax_with_cross_entropy,
softmax_with_cross_entropy_handler);
REGISTER_HANDLER(cross_entropy2, cross_entropy2_handler);
REGISTER_HANDLER(sigmoid_cross_entropy_with_logits,
sigmoid_cross_entropy_with_logits_handler);
REGISTER_HANDLER(kldiv_loss, kldiv_loss_handler);
REGISTER_HANDLER(bce_loss, binary_cross_entropy_handler);
REGISTER_HANDLER(huber_loss, huber_loss_handler);
REGISTER_HANDLER(warpctc, warpctc_handler);
REGISTER_HANDLER(rank_loss, rank_loss_handler);
REGISTER_HANDLER(margin_rank_loss, margin_rank_loss_handler);
45 changes: 39 additions & 6 deletions python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ def set_op_attrs(self):
self.loss_op = paddle.fluid.layers.cross_entropy

def set_data_feed(self):
self.data = paddle.uniform((32, 3, 10, 10), dtype='float32')
self.label = paddle.randint(0, 10, shape=[32], dtype='int64')
self.data = paddle.uniform((8, 3, 10, 10), dtype='float32')
self.label = paddle.randint(0, 10, shape=[8], dtype='int64')

def create_model(self, use_ipu=False):
return SimpleLayer(loss_op=self.loss_op,
Expand Down Expand Up @@ -215,8 +215,8 @@ def set_op_attrs(self):
self.loss_op = paddle.fluid.layers.softmax_with_cross_entropy

def set_data_feed(self):
self.data = paddle.uniform((32, 3, 10, 10), dtype='float32')
self.label = paddle.randint(0, 10, shape=[32, 1], dtype='int64')
self.data = paddle.uniform((8, 3, 10, 10), dtype='float32')
self.label = paddle.randint(0, 10, shape=[8, 1], dtype='int64')

def create_model(self, use_ipu=False):
return SimpleLayer(loss_op=self.loss_op,
Expand All @@ -231,8 +231,41 @@ def set_op_attrs(self):
self.loss_op = partial(paddle.fluid.layers.kldiv_loss, reduction="none")

def set_data_feed(self):
self.data = paddle.uniform((32, 3, 10, 10), dtype='float32')
self.label = paddle.rand(shape=[32, 81], dtype='float32')
self.data = paddle.uniform((8, 3, 10, 10), dtype='float32')
self.label = paddle.rand(shape=[8, 81], dtype='float32')

def create_model(self, use_ipu=False):
return SimpleLayer(loss_op=self.loss_op,
use_softmax=True,
use_reduction=True,
use_identity_loss=False)


class TestWithoutIdentityLoss4(TestBase):

def set_op_attrs(self):
self.loss_op = paddle.nn.functional.binary_cross_entropy

def set_data_feed(self):
self.data = paddle.uniform((8, 3, 10, 10), dtype='float32')
self.label = paddle.rand(shape=[8, 81], dtype='float32')

def create_model(self, use_ipu=False):
return SimpleLayer(loss_op=self.loss_op,
use_softmax=True,
use_reduction=False,
use_identity_loss=False)


class TestWithoutIdentityLoss5(TestBase):

def set_op_attrs(self):
self.loss_op = paddle.fluid.layers.sigmoid_cross_entropy_with_logits

def set_data_feed(self):
self.data = paddle.uniform((8, 3, 10, 10), dtype='float32')
self.label = paddle.randint(0, 10, shape=[8, 81],
dtype='int64').astype('float32')

def create_model(self, use_ipu=False):
return SimpleLayer(loss_op=self.loss_op,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import numpy as np
import paddle
import paddle.static
from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest


class TestBase(IPUOpTest):

def setUp(self):
self.set_atol()
self.set_training()
self.set_data_feed()
self.set_feed_attr()
self.set_op_attrs()

def set_data_feed(self):
label = np.random.uniform(size=[3, 1])
left = np.random.uniform(size=[3, 1])
right = np.random.uniform(size=[3, 1])
self.feed_fp32 = {
"label": label.astype(np.float32),
"left": left.astype(np.float32),
"right": right.astype(np.float32),
}
self.feed_fp16 = {
"label": label.astype(np.float16),
"left": left.astype(np.float16),
"right": right.astype(np.float16),
}

def set_feed_attr(self):
self.feed_shape = [x.shape for x in self.feed_fp32.values()]
self.feed_list = list(self.feed_fp32.keys())

def set_op_attrs(self):
self.attrs = {
'margin': 0.1,
}

@IPUOpTest.static_graph
def build_model(self, on_ipu):
label = paddle.static.data(name=self.feed_list[0],
shape=self.feed_shape[0],
dtype="float32")
left = paddle.static.data(name=self.feed_list[1],
shape=self.feed_shape[1],
dtype='float32')
right = paddle.static.data(name=self.feed_list[2],
shape=self.feed_shape[2],
dtype='float32')
out = paddle.fluid.layers.margin_rank_loss(label, left, right)
self.fetch_list = [out.name]

def run_model(self, exec_mode):
self.run_op_test(exec_mode)

def test(self):
for m in IPUOpTest.ExecutionMode:
if not self.skip_mode(m):
self.build_model(self.is_ipu_mode(m))
self.run_model(m)
self.check()


class TestCase1(TestBase):

def set_op_attrs(self):
self.attrs = {
'margin': 0.5,
}


if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit 8bf7cd8

Please sign in to comment.