Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
… prim_paddle
  • Loading branch information
JiabinYang committed Jan 8, 2023
2 parents a457bdb + 67fc8e9 commit ea5a9f7
Show file tree
Hide file tree
Showing 40 changed files with 957 additions and 404 deletions.
4 changes: 2 additions & 2 deletions paddle/fluid/distributed/auto_parallel/dist_attr.h
Original file line number Diff line number Diff line change
Expand Up @@ -288,8 +288,8 @@ class OperatorDistAttr {
std::string impl_type_ = kDefault;
int64_t impl_idx_ = 0;
bool is_recompute_ = false;
std::string execution_stream_;
int64_t scheduling_priority_; // lower value, higher priority, default to 0
std::string execution_stream_ = kDefault;
int64_t scheduling_priority_ = 0; // lower value, higher priority
std::map<std::string, bool> annotated_;
};

Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1398,9 +1398,11 @@ void PSGPUWrapper::build_task() {

void PSGPUWrapper::BeginPass() {
platform::Timer timer;
#if defined(PADDLE_WITH_GPU_GRAPH) && defined(PADDLE_WITH_HETERPS)
if (FLAGS_gpugraph_storage_mode == GpuGraphStorageMode::WHOLE_HBM) {
return;
}
#endif
timer.Start();
if (current_task_) {
PADDLE_THROW(
Expand All @@ -1426,9 +1428,11 @@ void PSGPUWrapper::BeginPass() {
}

void PSGPUWrapper::EndPass() {
#if defined(PADDLE_WITH_GPU_GRAPH) && defined(PADDLE_WITH_HETERPS)
if (FLAGS_gpugraph_storage_mode == GpuGraphStorageMode::WHOLE_HBM) {
return;
}
#endif
platform::Timer stagetime;
stagetime.Start();
HbmToSparseTable();
Expand Down
2 changes: 0 additions & 2 deletions paddle/fluid/inference/api/paddle_pass_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,6 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_elementwise_add_fuse_pass", //
#endif //
"transpose_flatten_concat_fuse_pass", //
"constant_folding_pass", //
"auto_mixed_precision_pass", //
"conv2d_fusion_layout_transfer_pass", //
"auto_mixed_precision_pass", //
"inplace_op_var_pass", // should be the last pass.
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/operators/quantize_linear_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,12 @@ class QuantizeLinearOpMaker : public framework::OpProtoAndCheckerMaker {
"(bool, default false) Set to true for inference only, false "
"for training. Some layers may run faster when this is true.")
.SetDefault(true);
AddAttr<bool>(
"only_observer",
"(bool, default false) Whether to only observer or not. If "
"only_observer=false, it will calculate fake quant or dequant output. "
"If only_observer=true, it will only calibrate scale information.")
.SetDefault(false);
AddComment(R"DOC(
The scale of QuantizeLinear operator is a vector.
In detail, each channel of the input X has a scale value.
Expand Down
39 changes: 31 additions & 8 deletions paddle/fluid/operators/quantize_linear_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class QuantizeLinearKernel : public framework::OpKernel<T> {
int bin_cnt = std::pow(2, bit_length - 1) - 1;
int quant_axis = context.Attr<int>("quant_axis");
bool is_test = context.Attr<bool>("is_test");
bool only_observer = context.Attr<bool>("only_observer");
auto& dev_ctx = context.template device_context<DeviceContext>();

if (quant_axis < 0) {
Expand Down Expand Up @@ -91,23 +92,39 @@ class QuantizeLinearKernel : public framework::OpKernel<T> {
out_state,
out_accum,
out_scale);
ClipAndFakeQuantFunctor<DeviceContext, T>()(
dev_ctx, *in, *out_scale, bin_cnt, round_type, out);
if (only_observer) {
framework::TensorCopy(*in, context.GetPlace(), dev_ctx, out);
} else {
ClipAndFakeQuantFunctor<DeviceContext, T>()(
dev_ctx, *in, *out_scale, bin_cnt, round_type, out);
}
} else {
ClipAndFakeQuantFunctor<DeviceContext, T>()(
dev_ctx, *in, *in_scale, bin_cnt, round_type, out);
if (only_observer) {
framework::TensorCopy(*in, context.GetPlace(), dev_ctx, out);
} else {
ClipAndFakeQuantFunctor<DeviceContext, T>()(
dev_ctx, *in, *in_scale, bin_cnt, round_type, out);
}
}
} else {
if (!is_test) {
auto* out_scale = context.Output<phi::DenseTensor>("OutScale");
T* out_scale_data = out_scale->mutable_data<T>(context.GetPlace());
FindChannelAbsMaxFunctor<DeviceContext, T>()(
dev_ctx, *in, quant_axis, out_scale_data);
ChannelClipAndFakeQuantFunctor<DeviceContext, T>()(
dev_ctx, *in, *out_scale, bin_cnt, round_type, quant_axis, out);
if (only_observer) {
framework::TensorCopy(*in, context.GetPlace(), dev_ctx, out);
} else {
ChannelClipAndFakeQuantFunctor<DeviceContext, T>()(
dev_ctx, *in, *out_scale, bin_cnt, round_type, quant_axis, out);
}
} else {
ChannelClipAndFakeQuantFunctor<DeviceContext, T>()(
dev_ctx, *in, *in_scale, bin_cnt, round_type, quant_axis, out);
if (only_observer) {
framework::TensorCopy(*in, context.GetPlace(), dev_ctx, out);
} else {
ChannelClipAndFakeQuantFunctor<DeviceContext, T>()(
dev_ctx, *in, *in_scale, bin_cnt, round_type, quant_axis, out);
}
}
}
}
Expand All @@ -132,6 +149,12 @@ class DeQuantizeLinearKernel : public framework::OpKernel<T> {
int bit_length = context.Attr<int>("bit_length");
auto quant_axis = context.Attr<int>("quant_axis");
dev_ctx.template Alloc<D>(out, out->numel() * sizeof(D));
bool only_observer = context.Attr<bool>("only_observer");

if (only_observer) {
framework::TensorCopy(*in, context.GetPlace(), dev_ctx, out);
return;
}

if (quant_axis < 0) {
float max_range = (std::pow(2, bit_length - 1) - 1);
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/pybind/inference_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,8 @@ void BindAnalysisConfig(py::module *m) {
py::arg("device_id") = 0,
py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32)
.def("exp_enable_use_cutlass", &AnalysisConfig::Exp_EnableUseCutlass)
.def("exp_disable_mixed_precision_ops",
&AnalysisConfig::Exp_DisableMixedPrecisionOps)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
.def("set_exec_stream",
[](AnalysisConfig &self, phi::CUDAStream &stream) {
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/backends/stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class Stream {
using Callback = std::function<void()>;

Stream() = default;
// For compatiable
// For compatible
Stream(const Place& place, stream_t stream);
~Stream();
const stream_t& raw_stream() const;
Expand Down
4 changes: 4 additions & 0 deletions paddle/phi/backends/xpu/xpu2_op_list.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ XPUOpMap& get_kl2_ops() {
phi::DataType::INT64})},
{"bilinear_interp_v2", XPUKernelSet({phi::DataType::FLOAT32})},
{"bilinear_interp_v2_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"bitwise_and", XPUKernelSet({phi::DataType::BOOL})},
{"bitwise_not", XPUKernelSet({phi::DataType::BOOL})},
{"bitwise_or", XPUKernelSet({phi::DataType::BOOL})},
{"bitwise_xor", XPUKernelSet({phi::DataType::BOOL})},
{"broadcast", XPUKernelSet({phi::DataType::FLOAT32})},
{"c_allgather",
XPUKernelSet({phi::DataType::FLOAT16,
Expand Down
59 changes: 33 additions & 26 deletions paddle/phi/infermeta/unary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1785,20 +1785,22 @@ void KthvalueInferMeta(const MetaTensor& x,
MetaConfig config) {
auto input_dims = x.dims();
const int& dim_size = input_dims.size();
PADDLE_ENFORCE_LT(axis,
PADDLE_ENFORCE_LE(axis,
dim_size,
phi::errors::InvalidArgument(
"the axis must be [-%d, %d), but received %d .",
dim_size,
dim_size,
axis));
PADDLE_ENFORCE_GE(axis,
-dim_size,
phi::errors::InvalidArgument(
"the axis must be [-%d, %d), but received %d .",
dim_size,
dim_size,
axis));
if (dim_size > 0) {
PADDLE_ENFORCE_GE(axis,
-dim_size,
phi::errors::InvalidArgument(
"the axis must be [-%d, %d), but received %d .",
dim_size,
dim_size,
axis));
}
if (axis < 0) axis += dim_size;
PADDLE_ENFORCE_GE(
k,
Expand All @@ -1807,9 +1809,9 @@ void KthvalueInferMeta(const MetaTensor& x,
"the k in the kthvalue must >= 1, but received %d .", k));
PADDLE_ENFORCE_GE(
input_dims.size(),
1,
phi::errors::InvalidArgument("input of kthvalue must have >= 1d shape"));
if (config.is_runtime) {
0,
phi::errors::InvalidArgument("input of kthvalue must have >= 0d shape"));
if (dim_size > 0 && config.is_runtime) {
PADDLE_ENFORCE_GE(
input_dims[axis],
k,
Expand All @@ -1822,7 +1824,7 @@ void KthvalueInferMeta(const MetaTensor& x,
for (int64_t i = 0; i < axis; i++) {
dimvec.emplace_back(input_dims[i]);
}
if (keepdim) {
if (keepdim && dim_size > 0) {
dimvec.emplace_back(static_cast<int64_t>(1));
}
for (int64_t i = axis + 1; i < dim_size; i++) {
Expand Down Expand Up @@ -2071,33 +2073,38 @@ void ModeInferMeta(const MetaTensor& x,
MetaTensor* indices) {
auto input_dims = x.dims();
const int& dim_size = input_dims.size();
PADDLE_ENFORCE_EQ(
(axis < dim_size) && (axis >= (-1 * dim_size)),
true,
errors::InvalidArgument(
"the axis of ModeOp must be [-%d, %d), but you set axis is %d",
dim_size,
dim_size,
axis));
PADDLE_ENFORCE_LE(axis,
dim_size,
phi::errors::InvalidArgument(
"the axis must be [-%d, %d), but received %d .",
dim_size,
dim_size,
axis));
if (dim_size > 0) {
PADDLE_ENFORCE_GE(axis,
-dim_size,
phi::errors::InvalidArgument(
"the axis must be [-%d, %d), but received %d .",
dim_size,
dim_size,
axis));
}
PADDLE_ENFORCE_GE(
input_dims.size(),
1,
errors::InvalidArgument("input of ModeOp must have >= 1d shape"));
0,
errors::InvalidArgument("input of ModeOp must have >= 0d shape"));
if (axis < 0) axis += dim_size;
std::vector<int64_t> dimvec;
for (int64_t i = 0; i < axis; i++) {
dimvec.emplace_back(input_dims[i]);
}
if (keepdim) {
if (keepdim && dim_size > 0) {
dimvec.emplace_back(static_cast<int64_t>(1));
}
for (int64_t i = axis + 1; i < dim_size; i++) {
dimvec.emplace_back(input_dims[i]);
}
DDim dims = phi::make_ddim(dimvec);
PADDLE_ENFORCE_GE(input_dims.size(),
1,
errors::InvalidArgument("input shape should >= 1d"));
out->set_dims(dims);
out->share_lod(x);
out->set_dtype(x.dtype());
Expand Down
10 changes: 9 additions & 1 deletion paddle/phi/kernels/cpu/kthvalue_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ void KthvalueGradKernel(const Context& dev_ctx,
DenseTensor* d_x) {
auto in_dims = x.dims();
auto out_dims = indices.dims();
T* x_grad_data = dev_ctx.template Alloc<T>(d_x);

// For 0D Tensor
if (in_dims.size() == 0) {
phi::funcs::set_constant(dev_ctx, d_x, 1.0);
return;
}

axis = (axis < 0) ? (in_dims.size() + axis) : axis;
if (!keepdim) {
std::vector<int> tmp_out_shape;
Expand All @@ -67,7 +75,7 @@ void KthvalueGradKernel(const Context& dev_ctx,
}
out_dims = phi::make_ddim(tmp_out_shape);
}
T* x_grad_data = dev_ctx.template Alloc<T>(d_x);

if (axis == in_dims.size() - 1) {
const int64_t input_height =
phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1));
Expand Down
14 changes: 14 additions & 0 deletions paddle/phi/kernels/cpu/kthvalue_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,22 @@ void KthvalueKernel(const Context& dev_ctx,
DenseTensor* indices) {
const auto& in_dims = x.dims();
if (axis < 0) axis += in_dims.size();

T* output_data = dev_ctx.template Alloc<T>(output);
int64_t* indices_data = dev_ctx.template Alloc<int64_t>(indices);
// For 0D Tensor
if (in_dims.size() == 0) {
PADDLE_ENFORCE_EQ(k,
1,
phi::errors::InvalidArgument(
"the k in the kthvalue must less equal than the "
"elemenents number of the input X, but received %d .",
k));

phi::Copy<Context>(dev_ctx, x, dev_ctx.GetPlace(), false, output);
phi::funcs::set_constant(dev_ctx, indices, 0);
return;
}
auto out_dims = output->dims();
if (axis == in_dims.size() - 1) {
const int64_t& input_height =
Expand Down
10 changes: 9 additions & 1 deletion paddle/phi/kernels/cpu/mode_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/mode.h"

namespace phi {
Expand All @@ -32,9 +33,17 @@ void ModeGradKernel(const Context& dev_ctx,
auto in_dims = x.dims();
auto out_dims = indices.dims();

T* x_grad_data = dev_ctx.template Alloc<T>(x_grad);

// axis < 0, get the real axis
axis = (axis < 0) ? (in_dims.size() + axis) : axis;

// For 0D Tensor
if (in_dims.size() == 0) {
phi::funcs::set_constant(dev_ctx, x_grad, 1.0);
return;
}

if (!keepdim) {
std::vector<int> tmp_out_shape;
for (int i = 0; i < axis; i++) {
Expand All @@ -46,7 +55,6 @@ void ModeGradKernel(const Context& dev_ctx,
}
out_dims = phi::make_ddim(tmp_out_shape);
}
T* x_grad_data = dev_ctx.template Alloc<T>(x_grad);

if (axis == in_dims.size() - 1) {
// allocate the memory for the input_grad
Expand Down
8 changes: 8 additions & 0 deletions paddle/phi/kernels/cpu/mode_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/mode.h"

namespace phi {
Expand All @@ -34,6 +35,13 @@ void ModeKernel(const Context& dev_ctx,

T* output_data = dev_ctx.template Alloc<T>(out);
int64_t* indices_data = dev_ctx.template Alloc<int64_t>(indices);

if (in_dims.size() == 0) {
phi::Copy<Context>(dev_ctx, x, dev_ctx.GetPlace(), false, out);
phi::funcs::set_constant(dev_ctx, indices, 0);
return;
}

// if axis is not the last dim, transpose it to the last dim, do the
// calculation, then tranpose it back to original axis.
if (axis == in_dims.size() - 1) {
Expand Down
10 changes: 9 additions & 1 deletion paddle/phi/kernels/gpu/kthvalue_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/top_k_function_cuda.h"

namespace phi {
Expand Down Expand Up @@ -43,8 +44,15 @@ void KthvalueGradKernel(const Context& dev_ctx,
DenseTensor* d_x) {
const auto& in_dims = x.dims();
auto out_dims = indices.dims();
if (axis < 0) axis += in_dims.size();
T* x_grad_data = dev_ctx.template Alloc<T>(d_x);
// For 0D Tensor
if (in_dims.size() == 0) {
phi::funcs::set_constant(dev_ctx, d_x, 1.0);
return;
}

if (axis < 0) axis += in_dims.size();

const T* out_grad_data = d_out.data<T>();
const int64_t* indices_data = indices.data<int64_t>();
int pre, n, post;
Expand Down
Loading

0 comments on commit ea5a9f7

Please sign in to comment.