Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
… nanquantile
  • Loading branch information
Asthestarsfalll committed Apr 13, 2022
2 parents 31e1734 + fe214af commit 4669518
Show file tree
Hide file tree
Showing 122 changed files with 5,910 additions and 728 deletions.
46 changes: 31 additions & 15 deletions cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,46 +7,62 @@ SET(XPU_PROJECT "extern_xpu")
SET(XPU_API_LIB_NAME "libxpuapi.so")
SET(XPU_RT_LIB_NAME "libxpurt.so")

if(NOT DEFINED XPU_BASE_URL)
SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220411")
else()
SET(XPU_BASE_URL "${XPU_BASE_URL}")
endif()

# ubuntu and centos: use output by XDNN API team
if(NOT DEFINED XPU_XDNN_BASE_URL)
SET(XPU_XDNN_BASE_URL_WITHOUT_DATE "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220412")
else()
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}")
endif()

IF(WITH_AARCH64)
SET(XPU_XRE_DIR_NAME "xre-kylin_aarch64")
SET(XPU_XDNN_DIR_NAME "xdnn-kylin_aarch64")
SET(XPU_XCCL_DIR_NAME "xccl-kylin_aarch64")
SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
ELSEIF(WITH_SUNWAY)
SET(XPU_XRE_DIR_NAME "xre-deepin_sw6_64")
SET(XPU_XDNN_DIR_NAME "xdnn-deepin_sw6_64")
SET(XPU_XCCL_DIR_NAME "xccl-deepin_sw6_64")
SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
ELSEIF(WITH_BDCENTOS)
SET(XPU_XRE_DIR_NAME "xre-bdcentos_x86_64")
SET(XPU_XDNN_DIR_NAME "xdnn-bdcentos_x86_64")
SET(XPU_XDNN_DIR_NAME "XDNN-bdcentos_x86_64")
SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
# ubuntu and centos: use output by XDNN API team
SET(XPU_XDNN_URL "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
ELSEIF(WITH_UBUNTU)
SET(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
SET(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
SET(XPU_XDNN_DIR_NAME "XDNN-ubuntu_x86_64")
SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
# ubuntu and centos: use output by XDNN API team
SET(XPU_XDNN_URL "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
ELSEIF(WITH_CENTOS)
SET(XPU_XRE_DIR_NAME "xre-centos7_x86_64")
SET(XPU_XDNN_DIR_NAME "xdnn-centos7_x86_64")
SET(XPU_XDNN_DIR_NAME "XDNN-bdcentos_x86_64")
SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")

ELSE ()
# ubuntu and centos: use output by XDNN API team
SET(XPU_XDNN_URL "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
ELSE()
SET(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
SET(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
SET(XPU_XDNN_DIR_NAME "XDNN-bdcentos_x86_64")
SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
# default: use output by XDNN API team
SET(XPU_XDNN_URL "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
ENDIF()

if(NOT DEFINED XPU_BASE_URL)
SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220411")
else()
SET(XPU_BASE_URL "${XPU_BASE_URL}")
endif()

SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220411/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
SET(XPU_PACK_DEPENCE_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/pack_paddle_depence.sh" CACHE STRING "" FORCE)

SET(SNAPPY_PREFIX_DIR "${THIRD_PARTY_PATH}/xpu")
SET(SNAPPY_PREFIX_DIR "${THIRD_PARTY_PATH}/xpu")
SET(XPU_DOWNLOAD_DIR "${SNAPPY_PREFIX_DIR}/src/${XPU_PROJECT}")
SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu")
SET(XPU_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include")
Expand Down
4 changes: 2 additions & 2 deletions cmake/xpu_kp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ macro(compile_kernel COMPILE_ARGS)
COMMAND
${CMAKE_COMMAND} -E make_directory kernel_build
COMMAND
cp ${kernel_path}/${kernel_name}.kps kernel_build/${kernel_name}.xpu -rf
${CMAKE_COMMAND} -E copy ${kernel_path}/${kernel_name}.kps kernel_build/${kernel_name}.xpu
COMMAND
${XPU_CLANG} --sysroot=${CXX_DIR} -std=c++11 -D_GLIBCXX_USE_CXX11_ABI=1 ${OPT_LEVEL} -fno-builtin -mcpu=xpu2 -fPIC ${XPU_CXX_DEFINES} ${XPU_CXX_FLAGS} ${XPU_CXX_INCLUDES}
-I. -o kernel_build/${kernel_name}.bin.o.sec kernel_build/${kernel_name}.xpu
Expand All @@ -151,7 +151,7 @@ macro(compile_kernel COMPILE_ARGS)
COMMAND
${CMAKE_COMMAND} -E make_directory kernel_build
COMMAND
cp ${kernel_path}/${kernel_name}.kps kernel_build/${kernel_name}.xpu -rf
${CMAKE_COMMAND} -E copy ${kernel_path}/${kernel_name}.kps kernel_build/${kernel_name}.xpu
COMMAND
${XPU_CLANG} --sysroot=${CXX_DIR} -std=c++11 -D_GLIBCXX_USE_CXX11_ABI=1 ${OPT_LEVEL} -fno-builtin -mcpu=xpu2 -fPIC ${XPU_CXX_DEFINES} ${XPU_CXX_FLAGS} ${XPU_CXX_INCLUDES}
-I. -o kernel_build/${kernel_name}.host.o kernel_build/${kernel_name}.xpu
Expand Down
9 changes: 5 additions & 4 deletions paddle/fluid/distributed/collective/Common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,14 @@ std::string GetKeyFromPlaces(const std::vector<Place>& places) {
}

static bool CheckTensorsInPlace(const std::vector<Tensor>& tensors,
const PlaceType type) {
return std::all_of(tensors.cbegin(), tensors.cend(),
[&](const Tensor& t) { return t.place() == type; });
phi::AllocationType type) {
return std::all_of(tensors.cbegin(), tensors.cend(), [&](const Tensor& t) {
return t.place().GetType() == type;
});
}

bool CheckTensorsInCudaPlace(const std::vector<Tensor>& tensors) {
return CheckTensorsInPlace(tensors, PlaceType::kGPU);
return CheckTensorsInPlace(tensors, phi::AllocationType::GPU);
}

} // namespace distributed
Expand Down
7 changes: 0 additions & 7 deletions paddle/fluid/distributed/collective/reducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -414,20 +414,13 @@ void EagerReducer::InitializeDenseGroups(
p_group->dense_tensors_.push_back(phi::DenseTensor());

const auto &dtype = tensor.dtype();
const auto &place = tensor.place();
const auto &inner_place = tensor.impl()->place();
if (index > 0) {
PADDLE_ENFORCE_EQ(dtype, p_group->dtype_,
platform::errors::PreconditionNotMet(
"Tensor %s has unexpected dtype.", tensor_name));
PADDLE_ENFORCE_EQ(place, place_,
platform::errors::PreconditionNotMet(
"Tensor %s has different place. Expected place is "
"%s, but actual place is %s",
tensor_name, inner_place_, inner_place));
} else {
p_group->dtype_ = dtype;
place_ = place;
inner_place_ = inner_place;
}
}
Expand Down
2 changes: 0 additions & 2 deletions paddle/fluid/distributed/collective/reducer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/phi/api/include/api.h"
#include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/api/lib/ext_compat_utils.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/utils/string/string_helper.h"
Expand Down Expand Up @@ -121,7 +120,6 @@ class EagerReducer {

std::vector<EagerGroup> groups_;
std::vector<TensorLocator> variable_locators_;
PlaceType place_;
platform::Place inner_place_;
size_t next_group_ = 0;
int64_t nranks_ = -1;
Expand Down
9 changes: 4 additions & 5 deletions paddle/fluid/framework/custom_operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/string/string_helper.h"
#include "paddle/phi/api/all.h"
#include "paddle/phi/api/lib/ext_compat_utils.h"
#include "paddle/phi/api/lib/utils/tensor_utils.h"
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/utils/any.h"
Expand Down Expand Up @@ -627,8 +626,8 @@ class CustomGradOpMaker<imperative::OpBase>
static void RegisterOperatorKernelWithPlace(
const std::string& name,
const OperatorWithKernel::OpKernelFunc& op_kernel_func,
const proto::VarType::Type type, const PlaceType& place) {
OpKernelType key(type, experimental::ConvertExtPlaceToInnerPlace(place));
const proto::VarType::Type type, const platform::Place& place) {
OpKernelType key(type, place);
VLOG(3) << "Custom Operator: op kernel key: " << key;
OperatorWithKernel::AllOpKernels()[name][key] = op_kernel_func;
}
Expand Down Expand Up @@ -666,10 +665,10 @@ static void RegisterOperatorKernel(const std::string& name,
op_kernel_func = func;
}
RegisterOperatorKernelWithPlace(name, op_kernel_func, proto::VarType::RAW,
PlaceType::kCPU);
platform::CPUPlace());
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
RegisterOperatorKernelWithPlace(name, op_kernel_func, proto::VarType::RAW,
PlaceType::kGPU);
platform::CUDAPlace());
#endif
}

Expand Down
26 changes: 25 additions & 1 deletion paddle/fluid/framework/fleet/heter_context.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License. */

#ifdef PADDLE_WITH_HETERPS

#include <ThreadPool.h>
#include <algorithm>
#include <map>
#include <unordered_map>
Expand All @@ -38,7 +39,7 @@ namespace framework {

class HeterContext {
public:
~HeterContext() {
virtual ~HeterContext() {
if (!multi_mf_dim_) {
for (size_t i = 0; i < mutex_.size(); ++i) {
delete mutex_[i];
Expand All @@ -56,9 +57,12 @@ class HeterContext {
Scope* scope_{nullptr};
std::vector<std::vector<FeatureKey>> feature_keys_;
std::vector<std::vector<std::vector<FeatureKey>>> feature_dim_keys_;
std::vector<std::vector<std::vector<FeatureKey>>> device_task_keys_;

#ifdef PADDLE_WITH_PSLIB
std::vector<std::vector<paddle::ps::DownpourFixedFeatureValue*>> value_ptr_;
std::vector<std::vector<std::vector<paddle::ps::DownpourFixedFeatureValue*>>>
device_task_ptr_;
std::vector<std::vector<std::vector<paddle::ps::DownpourFixedFeatureValue*>>>
value_dim_ptr_;
std::vector<std::vector<std::vector<paddle::ps::DownpourFixedFeatureValue*>>>
Expand All @@ -68,6 +72,8 @@ class HeterContext {
std::vector<std::vector<paddle::distributed::FixedFeatureValue*>> value_ptr_;
std::vector<std::vector<std::vector<paddle::distributed::FixedFeatureValue*>>>
value_dim_ptr_;
std::vector<std::vector<std::vector<paddle::distributed::FixedFeatureValue*>>>
device_task_ptr_;
std::vector<std::vector<std::vector<paddle::distributed::FixedFeatureValue*>>>
device_dim_ptr_;
#endif
Expand All @@ -93,6 +99,12 @@ class HeterContext {
shard_num_ = shard_num;
feature_keys_.resize(shard_num_);
value_ptr_.resize(shard_num_);
device_task_ptr_.resize(shard_num_);
device_task_keys_.resize(shard_num_);
for (size_t i = 0; i < device_task_ptr_.size(); i++) {
device_task_ptr_[i].resize(device_num);
device_task_keys_[i].resize(device_num);
}

device_values_.resize(device_num);
device_keys_.resize(device_num);
Expand All @@ -108,6 +120,12 @@ class HeterContext {
feature_dim_keys_.resize(shard_num_);
value_ptr_.resize(shard_num_);
value_dim_ptr_.resize(shard_num_);
device_task_ptr_.resize(shard_num_);
device_task_keys_.resize(shard_num_);
for (size_t i = 0; i < device_task_ptr_.size(); i++) {
device_task_ptr_[i].resize(device_num);
device_task_keys_[i].resize(device_num);
}
for (size_t i = 0; i < feature_dim_keys_.size(); i++) {
feature_dim_keys_[i].resize(dim_num);
value_dim_ptr_[i].resize(dim_num);
Expand Down Expand Up @@ -151,6 +169,12 @@ class HeterContext {
for (size_t i = 0; i < device_keys_.size(); ++i) {
device_keys_[i].clear();
}
for (size_t i = 0; i < device_task_ptr_.size(); ++i) {
for (size_t j = 0; j < device_task_ptr_[i].size(); ++j) {
device_task_ptr_[i][j].clear();
device_task_keys_[i][j].clear();
}
}
} else {
VLOG(3) << "Reset gpu task with dynamic mf dimention";
for (size_t i = 0; i < feature_dim_keys_.size(); i++) {
Expand Down
Loading

0 comments on commit 4669518

Please sign in to comment.