Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
… variable-keeping-with-ensor
  • Loading branch information
CtfGo committed Jun 16, 2021
2 parents 67837ee + 32e3353 commit 1850dfc
Show file tree
Hide file tree
Showing 63 changed files with 1,488 additions and 538 deletions.
30 changes: 22 additions & 8 deletions cmake/external/lite.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,21 @@ if(NOT LINUX)
return()
endif()

if(XPU_SDK_ROOT)
set(LITE_WITH_XPU ON)
include_directories("${XPU_SDK_ROOT}/XTDK/include")
include_directories("${XPU_SDK_ROOT}/XTCL/include")
if (LITE_WITH_XPU)
add_definitions(-DLITE_SUBGRAPH_WITH_XPU)
LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/shlib/")
LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/runtime/shlib/")
IF(WITH_AARCH64)
SET(XPU_SDK_ENV "kylin_aarch64")
ELSEIF(WITH_SUNWAY)
SET(XPU_SDK_ENV "deepin_sw6_64")
ELSEIF(WITH_BDCENTOS)
SET(XPU_SDK_ENV "bdcentos_x86_64")
ELSEIF(WITH_UBUNTU)
SET(XPU_SDK_ENV "ubuntu_x86_64")
ELSEIF(WITH_CENTOS)
SET(XPU_SDK_ENV "centos7_x86_64")
ELSE ()
SET(XPU_SDK_ENV "ubuntu_x86_64")
ENDIF()
endif()

if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
Expand Down Expand Up @@ -57,7 +65,8 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DWITH_TESTING=OFF
-DLITE_BUILD_EXTRA=ON
-DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_ROOT=${XPU_SDK_ROOT}
-DXPU_SDK_URL=${XPU_BASE_URL}
-DXPU_SDK_ENV=${XPU_SDK_ENV}
-DLITE_WITH_CODE_META_INFO=OFF
-DLITE_WITH_ARM=ON)
ExternalProject_Add(
Expand Down Expand Up @@ -99,7 +108,8 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DLITE_WITH_STATIC_CUDA=OFF
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME}
-DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_ROOT=${XPU_SDK_ROOT}
-DXPU_SDK_URL=${XPU_SDK_URL}
-DXPU_SDK_ENV=${XPU_SDK_ENV}
-DLITE_WITH_CODE_META_INFO=OFF
-DLITE_WITH_ARM=OFF)

Expand Down Expand Up @@ -147,6 +157,10 @@ message(STATUS "Paddle-lite BINARY_DIR: ${LITE_BINARY_DIR}")
message(STATUS "Paddle-lite SOURCE_DIR: ${LITE_SOURCE_DIR}")
include_directories(${LITE_SOURCE_DIR})
include_directories(${LITE_BINARY_DIR})
if(LITE_WITH_XPU)
include_directories(${LITE_BINARY_DIR}/third_party/install/xpu/xdnn/include/)
include_directories(${LITE_BINARY_DIR}/third_party/install/xpu/xre/include/)
endif()

function(external_lite_libs alias path)
add_library(${alias} SHARED IMPORTED GLOBAL)
Expand Down
11 changes: 5 additions & 6 deletions cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ ELSE ()
SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
ENDIF()

SET(XPU_BASE_URL "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev/20210527")
IF(NOT XPU_BASE_URL)
SET(XPU_BASE_URL "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev/20210527")
ENDIF()

SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
SET(XPU_XCCL_URL "${XPU_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
Expand Down Expand Up @@ -93,11 +96,7 @@ ELSE(WITH_XPU_BKCL)
TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB})
ENDIF(WITH_XPU_BKCL)

if(NOT XPU_SDK_ROOT)
ADD_DEPENDENCIES(xpulib ${XPU_PROJECT})
else()
ADD_CUSTOM_TARGET(extern_xpu DEPENDS xpulib)
endif()
ADD_DEPENDENCIES(xpulib ${XPU_PROJECT})

# Ensure that xpu/api.h can be included without dependency errors.
file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/.xpu_headers_dummy.cc CONTENT "")
Expand Down
3 changes: 0 additions & 3 deletions paddle/fluid/framework/ir/op_compat_sensible_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,6 @@ AttrCompat& AttrCompat::IsLeftDefault() {
}

bool AttrCompat::operator()(const OpDesc& op_desc) {
if (conditions_.empty()) {
return true;
}
if (!op_desc.HasAttr(attr_name_)) {
if (!optional_) {
LOG(WARNING) << "The non-optional Attr(" << attr_name_ << ") of Op ("
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/tensorrt/op_teller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
return false;
// Paddle-TRT does not support the input tensors: Shape and ShapeTensor
} else if (desc.Input("Shape").size() >= 1 ||
desc.Input("ShapeTensor").size() >= 1 || with_dynamic_shape) {
desc.Input("ShapeTensor").size() >= 1) {
return false;
} else {
std::vector<int> shape =
Expand Down
62 changes: 1 addition & 61 deletions paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu
Original file line number Diff line number Diff line change
Expand Up @@ -182,69 +182,9 @@ int LayerNormPluginDynamic::enqueue(
paddle::operators::LayerNormDirectCUDAFunctor<float> layer_norm;
layer_norm(stream, input, input_shape, bias_d, scale_d, output, mean_d,
variance_d, begin_norm_axis, eps);
} else if (input_type == nvinfer1::DataType::kHALF) {
#ifdef TRT_PLUGIN_FP16_AVALIABLE
VLOG(1) << "TRT Plugin DataType selected. LayerNorm-->fp16";
const half *input = reinterpret_cast<const half *>(inputs[0]);
half *output = static_cast<half *>(outputs[0]);
size_t mean_shape_product = 1;
for (auto s : mean_shape_) {
mean_shape_product *= s;
}
size_t variance_shape_product = 1;
for (auto s : variance_shape_) {
variance_shape_product *= s;
}
if (!scale_gpu_half_d_) {
cudaMalloc(&scale_gpu_half_d_, feature_size * sizeof(half));
}
if (!bias_gpu_half_d_) {
cudaMalloc(&bias_gpu_half_d_, feature_size * sizeof(half));
}
if (!mean_gpu_half_d_) {
cudaMalloc(&mean_gpu_half_d_, mean_shape_product * sizeof(half));
}
if (!variance_gpu_half_d_) {
cudaMalloc(&variance_gpu_half_d_, variance_shape_product * sizeof(half));
}

half *scale_cpu_half =
static_cast<half *>(malloc(feature_size * sizeof(half)));
half *bias_cpu_half =
static_cast<half *>(malloc(feature_size * sizeof(half)));
PADDLE_ENFORCE_EQ(
scale_cpu_half && bias_cpu_half, true,
platform::errors::Unavailable("Out of memory, malloc size %d.",
feature_size * sizeof(half)));

for (int i = 0; i < feature_size; i++) {
scale_cpu_half[i] = static_cast<half>(scale_[i]);
bias_cpu_half[i] = static_cast<half>(bias_[i]);
}
cudaMemcpyAsync(scale_gpu_half_d_, scale_cpu_half,
sizeof(half) * feature_size, cudaMemcpyHostToDevice,
stream);
cudaMemcpyAsync(bias_gpu_half_d_, bias_cpu_half,
sizeof(half) * feature_size, cudaMemcpyHostToDevice,
stream);
free(scale_cpu_half);
free(bias_cpu_half);

paddle::operators::LayerNormDirectCUDAFunctor<half> layer_norm;
layer_norm(stream, input, input_shape, bias_gpu_half_d_, scale_gpu_half_d_,
output, mean_gpu_half_d_, variance_gpu_half_d_, begin_norm_axis,
eps);
#else
PADDLE_THROW(platform::errors::Fatal(
"The layer_norm tensorRT plugin should be "
"complied with CUDA version >= 10.0 when running with fp16. "
"Please recomplie it or try to use fp32 by set "
"config.SetTRTDynamicShapeInfo(min_input_shape, "
"max_input_shape, opt_input_shape, true"));
#endif
} else {
PADDLE_THROW(platform::errors::Fatal(
"The LayerNorm TRT Plugin's input type should be float or half."));
"The LayerNorm TRT Plugin's input type should be float."));
}
return cudaGetLastError() != cudaSuccess;
}
Expand Down
31 changes: 2 additions & 29 deletions paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,22 +114,14 @@ class LayerNormPluginDynamic : public DynamicPluginTensorRT {
: begin_norm_axis_(begin_norm_axis),
eps_(eps),
mean_shape_(mean_shape),
variance_shape_(variance_shape),
scale_gpu_half_d_(nullptr),
bias_gpu_half_d_(nullptr),
mean_gpu_half_d_(nullptr),
variance_gpu_half_d_(nullptr) {
variance_shape_(variance_shape) {
bias_.resize(bias_num);
scale_.resize(scale_num);
std::copy(bias, bias + bias_num, bias_.data());
std::copy(scale, scale + scale_num, scale_.data());
}

LayerNormPluginDynamic(void const* serialData, size_t serialLength)
: scale_gpu_half_d_(nullptr),
bias_gpu_half_d_(nullptr),
mean_gpu_half_d_(nullptr),
variance_gpu_half_d_(nullptr) {
LayerNormPluginDynamic(void const* serialData, size_t serialLength) {
DeserializeValue(&serialData, &serialLength, &bias_);
DeserializeValue(&serialData, &serialLength, &scale_);
DeserializeValue(&serialData, &serialLength, &begin_norm_axis_);
Expand Down Expand Up @@ -190,21 +182,6 @@ class LayerNormPluginDynamic : public DynamicPluginTensorRT {
const nvinfer1::DataType* inputTypes,
int nbInputs) const override;

~LayerNormPluginDynamic() {
if (scale_gpu_half_d_) {
cudaFree(scale_gpu_half_d_);
}
if (bias_gpu_half_d_) {
cudaFree(bias_gpu_half_d_);
}
if (mean_gpu_half_d_) {
cudaFree(mean_gpu_half_d_);
}
if (variance_gpu_half_d_) {
cudaFree(variance_gpu_half_d_);
}
}

void destroy() override { delete this; }

private:
Expand All @@ -218,10 +195,6 @@ class LayerNormPluginDynamic : public DynamicPluginTensorRT {
float eps_;
std::vector<int64_t> mean_shape_;
std::vector<int64_t> variance_shape_;
half* scale_gpu_half_d_;
half* bias_gpu_half_d_;
half* mean_gpu_half_d_;
half* variance_gpu_half_d_;
};

class LayerNormPluginDynamicCreator : public nvinfer1::IPluginCreator {
Expand Down
30 changes: 15 additions & 15 deletions paddle/fluid/operators/compat/batch_norm.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,21 @@ def {
outputs {
name: "Y"
}
outputs {
name: "MeanOut"
}
outputs {
name: "VarianceOut"
}
outputs {
name: "SavedMean"
}
outputs {
name: "SavedVariance"
}
outputs {
name: "ReserveSpace"
}
attrs {
name: "epsilon"
type: FLOAT
Expand Down Expand Up @@ -55,21 +70,6 @@ extra {
name: "trainable_statistics"
type: BOOLEAN
}
outputs {
name: "MeanOut"
}
outputs {
name: "VarianceOut"
}
outputs {
name: "SavedMean"
}
outputs {
name: "SavedVariance"
}
outputs {
name: "ReserveSpace"
}
attrs {
name: "op_role"
type: INT
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/operators/compat/conv2d.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ def {
name: "dilations"
type: INTS
}
attrs {
name: "data_format"
type: STRING
}
}
extra {
inputs {
Expand Down Expand Up @@ -113,10 +117,6 @@ extra {
name: "force_fp32_output"
type: BOOLEAN
}
attrs {
name: "data_format"
type: STRING
}
attrs {
name: "workspace_size_MB"
type: INT
Expand Down
10 changes: 5 additions & 5 deletions paddle/fluid/operators/compat/conv2d_transpose.pbtxt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
type: "reduce_mean"
type: "conv2d_transpose"
def {
inputs {
name: "Input"
Expand Down Expand Up @@ -40,6 +40,10 @@ def {
name: "padding_algorithm"
type: STRING
}
attrs {
name: "data_format"
type: STRING
}
}
extra {
attrs {
Expand Down Expand Up @@ -78,10 +82,6 @@ extra {
name: "fuse_beta"
type: FLOAT
}
attrs {
name: "data_format"
type: STRING
}
attrs {
name: "workspace_size_MB"
type: INT
Expand Down
Loading

0 comments on commit 1850dfc

Please sign in to comment.