Skip to content

Commit

Permalink
explicitly specify capture mode for lambdas (PaddlePaddle#55)
Browse files Browse the repository at this point in the history
 explicitly specify capture mode for lambdas
  • Loading branch information
Feiyu Chan authored Sep 16, 2021
1 parent ffcf187 commit 6c3322c
Show file tree
Hide file tree
Showing 41 changed files with 2,339 additions and 182 deletions.
1 change: 1 addition & 0 deletions cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ function(op_library TARGET)
list(REMOVE_ITEM hip_srcs "cholesky_op.cu")
list(REMOVE_ITEM hip_srcs "matrix_rank_op.cu")
list(REMOVE_ITEM hip_srcs "svd_op.cu")
list(REMOVE_ITEM hip_srcs "eigh_op.cu")
list(REMOVE_ITEM hip_srcs "multinomial_op.cu")
list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu")
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} ${mkldnn_cc_srcs} ${hip_srcs} DEPS ${op_library_DEPS}
Expand Down
14 changes: 14 additions & 0 deletions paddle/fluid/framework/hogwild_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ void HogwildWorker::TrainFiles() {
// how to accumulate fetched values here
device_reader_->Start();
int cur_batch;
int batch_cnt = 0;
while ((cur_batch = device_reader_->Next()) > 0) {
for (auto &op : ops_) {
bool need_skip = false;
Expand All @@ -230,13 +231,26 @@ void HogwildWorker::TrainFiles() {
}
}

if (need_dump_field_) {
DumpField(*thread_scope_, dump_mode_, dump_interval_);
}
if (need_dump_param_ && thread_id_ == 0) {
DumpParam(*thread_scope_, batch_cnt);
}

total_ins_num += cur_batch;
++batch_cnt;
PrintFetchVars();
thread_scope_->DropKids();
}
timeline.Pause();
VLOG(3) << "worker " << thread_id_ << " train cost " << timeline.ElapsedSec()
<< " seconds, ins_num: " << total_ins_num;

if (need_dump_field_ || need_dump_param_) {
writer_.Flush();
}

#if defined PADDLE_WITH_PSCORE
if (thread_barrier_) {
paddle::distributed::Communicator::GetInstance()->BarrierTriggerDecrement();
Expand Down
8 changes: 6 additions & 2 deletions paddle/fluid/framework/multi_trainer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -214,15 +214,19 @@ void MultiTrainer::Finalize() {
if (need_dump_field_ || need_dump_param_) {
FinalizeDumpEnv();
}
#ifdef PADDLE_WITH_HETERPS

for (size_t i = 0; i < need_merge_var_names_.size(); i++) {
Variable* root_var = root_scope_->FindVar(need_merge_var_names_[i]);
if (root_var == nullptr) {
continue;
}
LoDTensor* root_tensor = root_var->GetMutable<LoDTensor>();

#ifdef PADDLE_WITH_HETERPS
for (size_t j = 0; j < places_.size(); j++) {
#else
for (int j = 1; j < thread_num_; j++) {
#endif
Scope* cur_thread_scope = workers_[j]->GetThreadScope();
Variable* thread_var =
cur_thread_scope->FindVar(need_merge_var_names_[i]);
Expand All @@ -246,8 +250,8 @@ void MultiTrainer::Finalize() {
_ForEachDataType_(MergeCallback);
}
}
#ifdef PADDLE_WITH_HETERPS
MergeDenseParam();

#endif
root_scope_->DropKids();
}
Expand Down
32 changes: 13 additions & 19 deletions paddle/fluid/inference/tensorrt/convert/gather_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,33 +41,27 @@ class GatherOpConverter : public OpConverter {
std::string input_name = op_desc.Input("X").front();
std::string index_name = op_desc.Input("Index").front();
std::string output_name = op_desc.Output("Out").front();

const auto input_tensor = engine_->GetITensor(input_name);
const auto index_tensor = engine_->GetITensor(index_name);

const int axis = 0;
int axis = 0;
if (op_desc.HasAttr("axis")) {
axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
}

auto layer = TRT_ENGINE_ADD_LAYER(engine_, Gather, *input_tensor,
*index_tensor, axis);
auto reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *index_tensor);

auto odim = layer->getOutput(0)->getDimensions();
nvinfer1::Dims index_shape{};
index_shape.nbDims = 1;
index_shape.d[0] = -1;

auto reshape_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *layer->getOutput(0));
reshape_layer->setReshapeDimensions(index_shape);

nvinfer1::Dims target_shape{};
target_shape.nbDims = odim.nbDims - 1;
for (int i = 0; i < axis; ++i) {
target_shape.d[i] = odim.d[i];
}
target_shape.d[axis] = 0;
for (int i = axis + 1; i < target_shape.nbDims; ++i) {
target_shape.d[i] = odim.d[i + 1];
}

reshape_layer->setReshapeDimensions(target_shape);
auto layer = TRT_ENGINE_ADD_LAYER(engine_, Gather, *input_tensor,
*reshape_layer->getOutput(0), axis);
layer->setNbElementWiseDims(0);

RreplenishLayerAndOutput(reshape_layer, "gather", {output_name}, test_mode);
RreplenishLayerAndOutput(layer, "gather", {output_name}, test_mode);
}
};

Expand Down
32 changes: 17 additions & 15 deletions paddle/fluid/inference/tensorrt/op_teller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,15 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
}

if (op_type == "gather") {
if (!with_dynamic_shape) return false;

if (with_dynamic_shape) {
auto gather_inputs = desc.Inputs();
if (gather_inputs.find("Axis") != gather_inputs.end()) {
if (desc.Input("Axis").size() >= 1) {
return false;
}
}
if (!with_dynamic_shape) {
return false;
} else {
auto* block = desc.Block();
auto* x_var_desc = block->FindVar(desc.Input("X")[0]);
const auto x_shape = x_var_desc->GetShape();
Expand All @@ -373,13 +379,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
return false;
}
}

auto inputs = desc.InputArgumentNames();
for (auto& input : inputs) {
if (input == "Axis" && desc.Input("Axis").size() > 0) return false;
}
// current not support axis from input, use default 0
if (desc.GetAttrIfExists<int>("axis")) return false;
}

if (op_type == "gather_nd") {
Expand Down Expand Up @@ -1085,13 +1084,16 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
#if IS_TRT_VERSION_GE(7000)
if (op_type == "tile") {
// Paddle-TRT does not support the input tensors.
auto inputs = desc.InputArgumentNames();
for (auto& input : inputs) {
if (input == "repeat_times_tensor" &&
desc.Input("repeat_times_tensor").size() > 0)
auto tile_inputs = desc.Inputs();
if (tile_inputs.find("repeat_times_tensor") != tile_inputs.end()) {
if (desc.Input("repeat_times_tensor").size() >= 1) {
return false;
if (input == "RepeatTimes" && desc.Input("RepeatTimes").size() > 0)
}
}
if (tile_inputs.find("RepeatTimes") != tile_inputs.end()) {
if (desc.Input("RepeatTimes").size() >= 1) {
return false;
}
}
if (with_dynamic_shape) return false;
if (!with_dynamic_shape && !desc.HasAttr("repeat_times")) return false;
Expand Down
167 changes: 167 additions & 0 deletions paddle/fluid/operators/eigh_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/eigh_op.h"

namespace paddle {
namespace operators {

using framework::Tensor;

class EighOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;

void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Eigh");
OP_INOUT_CHECK(ctx->HasOutput("Eigenvalues"), "Output", "Eigenvalues",
"Eigh");
OP_INOUT_CHECK(ctx->HasOutput("Eigenvectors"), "Output", "Eigenvectors",
"Eigh");

auto input_dim = ctx->GetInputDim("X");
auto rank = input_dim.size();

PADDLE_ENFORCE_GE(rank, 2,
platform::errors::InvalidArgument(
"The Input(X) should have at least 2 dimensions."
"But received a %d dimension tensor.",
rank));
PADDLE_ENFORCE_EQ(
input_dim[rank - 2], input_dim[rank - 1],
platform::errors::InvalidArgument(
"Eigh op is designed for square matrix, consequently"
"inner-most 2 dimensions of Input(X) should be symmetric."
"But received X's shape[-2] = %d and shape[-1] = %d.",
input_dim[rank - 2], input_dim[rank - 1]));

std::vector<int64_t> values_dim;
if (rank > 2) {
for (auto i = 0; i < rank - 1; i++) {
values_dim.emplace_back(input_dim[i]);
}
} else {
values_dim = {input_dim[1]};
}

ctx->SetOutputDim("Eigenvalues", framework::make_ddim(values_dim));
ctx->SetOutputDim("Eigenvectors", input_dim);
}
};

class EignOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"(Tensor), Hermitian or real symmetric matrices."
"Its shape should be [*, N, N] where * is zero or"
"more batch dimensions. The data type is float32 ,"
"float64, complex64, complex128.");
AddOutput("Eigenvalues",
"(Tensor), The eigenvalues in ascending order."
"The data type is float32 or float64.");
AddOutput(
"Eigenvectors",
"(Tensor), The column is the normalized eigenvector "
"corresponding to the eigenvalue. The data type is the same as ``X``.");
AddAttr<std::string>(
"UPLO",
"(string, default 'L'), 'L' represents the lower triangular matrix,"
"'U' represents the upper triangular matrix.")
.SetDefault("L");
AddComment(R"DOC(
Eigh Operator.
Computes the eigenvalues and eigenvectors of a complex Hermitian
(conjugate symmetric) or a real symmetric matrix.
)DOC");
}
};

class EighGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;

void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("Eigenvalues"), "Input", "Eigenvalues",
"EighGrad");
OP_INOUT_CHECK(ctx->HasInput("Eigenvectors"), "Input", "Eigenvectors",
"EighGrad");
OP_INOUT_CHECK(ctx->HasInputs(framework::GradVarName("Eigenvalues")),
"Input", "Eigenvalues@GRAD", "EighGrad");
OP_INOUT_CHECK(ctx->HasInputs(framework::GradVarName("Eigenvectors")),
"Input", "Eigenvectors@GRAD", "EighGrad");
auto dims = ctx->GetInputDim("Eigenvectors");
auto x_grad_name = framework::GradVarName("X");
if (ctx->HasOutput(x_grad_name)) {
ctx->SetOutputDim(x_grad_name, dims);
}
}

protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(
ctx, framework::GradVarName("Eigenvectors")),
ctx.device_context());
}
};

template <typename T>
class EighGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;

protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType(this->ForwardOpType() + "_grad");
op->SetInput("Eigenvalues", this->Output("Eigenvalues"));
op->SetInput("Eigenvectors", this->Output("Eigenvectors"));
op->SetInput(framework::GradVarName("Eigenvalues"),
this->OutputGrad("Eigenvalues"));
op->SetInput(framework::GradVarName("Eigenvectors"),
this->OutputGrad("Eigenvectors"));
op->SetAttrMap(this->Attrs());
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;

REGISTER_OPERATOR(eigh, ops::EighOp, ops::EignOpMaker,
ops::EighGradOpMaker<paddle::framework::OpDesc>,
ops::EighGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(eigh_grad, ops::EighGradOp);

REGISTER_OP_CPU_KERNEL(
eigh, ops::EighKernel<paddle::platform::CPUDeviceContext, float, float>,
ops::EighKernel<paddle::platform::CPUDeviceContext, double, double>,
ops::EighKernel<paddle::platform::CPUDeviceContext, float,
paddle::platform::complex<float>>,
ops::EighKernel<paddle::platform::CPUDeviceContext, double,
paddle::platform::complex<double>>);

REGISTER_OP_CPU_KERNEL(
eigh_grad,
ops::EighGradKernel<paddle::platform::CPUDeviceContext, float, float>,
ops::EighGradKernel<paddle::platform::CPUDeviceContext, double, double>,
ops::EighGradKernel<paddle::platform::CPUDeviceContext, float,
paddle::platform::complex<float>>,
ops::EighGradKernel<paddle::platform::CPUDeviceContext, double,
paddle::platform::complex<double>>);
53 changes: 53 additions & 0 deletions paddle/fluid/operators/eigh_op.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/eigh_op.h"

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;

template <typename ValueType, typename T>
class EighGPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto input_var = ctx.Input<Tensor>("X");
auto output_w_var = ctx.Output<Tensor>("Eigenvalues");
auto output_v_var = ctx.Output<Tensor>("Eigenvectors");
std::string lower = ctx.Attr<std::string>("UPLO");
bool is_lower = (lower == "L");
math::MatrixEighFunctor<ValueType, T> functor;
functor(ctx, *input_var, output_w_var, output_v_var, is_lower, true);
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;

REGISTER_OP_CUDA_KERNEL(
eigh, ops::EighGPUKernel<float, float>, ops::EighGPUKernel<double, double>,
ops::EighGPUKernel<float, paddle::platform::complex<float>>,
ops::EighGPUKernel<double, paddle::platform::complex<double>>);

REGISTER_OP_CUDA_KERNEL(
eigh_grad,
ops::EighGradKernel<paddle::platform::CUDADeviceContext, float, float>,
ops::EighGradKernel<paddle::platform::CUDADeviceContext, double, double>,
ops::EighGradKernel<paddle::platform::CUDADeviceContext, float,
paddle::platform::complex<float>>,
ops::EighGradKernel<paddle::platform::CUDADeviceContext, double,
paddle::platform::complex<double>>);
Loading

0 comments on commit 6c3322c

Please sign in to comment.