Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions paddle/fluid/operators/save_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,23 @@ PD_REGISTER_KERNEL(save,
kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND);
}

#ifdef PADDLE_WITH_XPU
PD_REGISTER_KERNEL(save,
XPU,
ALL_LAYOUT,
ops::SaveKernel,
float,
double,
int,
uint8_t,
int8_t,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16) {
kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND);
}
#endif

PD_REGISTER_KERNEL(save_sr,
CPU,
ALL_LAYOUT,
Expand Down
9 changes: 9 additions & 0 deletions paddle/phi/backends/xpu/xpu2_op_list.cc
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,15 @@ XPUOpMap& get_kl2_ops() {
{"roll_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"rsqrt", XPUKernelSet({phi::DataType::FLOAT32})},
{"rsqrt_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"save",
XPUKernelSet({phi::DataType::FLOAT32,
phi::DataType::FLOAT64,
phi::DataType::INT32,
phi::DataType::UINT8,
phi::DataType::INT8,
phi::DataType::INT64,
phi::DataType::FLOAT16,
phi::DataType::BFLOAT16})},
{"scale",
XPUKernelSet({phi::DataType::FLOAT32,
phi::DataType::FLOAT16,
Expand Down
9 changes: 9 additions & 0 deletions paddle/phi/backends/xpu/xpu3_op_list.cc
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,15 @@ XPUOpMap& get_kl3_ops() {
{"roll_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"rsqrt", XPUKernelSet({phi::DataType::FLOAT32})},
{"rsqrt_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"save",
XPUKernelSet({phi::DataType::FLOAT32,
phi::DataType::FLOAT64,
phi::DataType::INT32,
phi::DataType::UINT8,
phi::DataType::INT8,
phi::DataType::INT64,
phi::DataType::FLOAT16,
phi::DataType::BFLOAT16})},
{"scale",
XPUKernelSet({phi::DataType::FLOAT32,
phi::DataType::FLOAT16,
Expand Down
3 changes: 3 additions & 0 deletions test/cpp/fluid/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ paddle_test(assign_op_test SRCS assign_op_test.cc)
paddle_test(scatter_test SRCS scatter_test.cc DEPS common)
paddle_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc)
paddle_test(save_load_op_test SRCS save_load_op_test.cc)
if(WITH_XPU)
paddle_test(save_load_op_test_xpu SRCS save_load_op_test_xpu.cc)
endif()
paddle_test(save_load_combine_op_test SRCS save_load_combine_op_test.cc)
if(WITH_CINN)
set(CINN_DEPS python)
Expand Down
123 changes: 123 additions & 0 deletions test/cpp/fluid/save_load_op_test_xpu.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/phi/core/kernel_registry.h"

template <typename Place, typename T>
int SaveLoadOpTest(Place place, int dim_1, int dim_2) {
// use cpu place for ground truth
paddle::platform::CPUPlace cpu_place;
std::vector<T> ground_truth_cpu(dim_1 * dim_2);
for (int i = 0; i < dim_1 * dim_2; i++) {
ground_truth_cpu[i] = static_cast<T>(i);
}

// scope, var, tensor and lod
paddle::framework::Scope scope;
auto var = scope.Var("test_var");
auto tensor = var->GetMutable<phi::DenseTensor>();
tensor->Resize({dim_1, dim_2});
paddle::framework::LoD expect_lod;
expect_lod.resize(1);
for (int i = 0; i < dim_1; i++) {
expect_lod[0].push_back(i);
}
tensor->set_lod(expect_lod);
T* src_mutable = tensor->mutable_data<T>(place);
// copy cpu data to tensor
paddle::memory::Copy(place,
src_mutable,
cpu_place,
ground_truth_cpu.data(),
sizeof(T) * ground_truth_cpu.size());

// run save op
paddle::framework::AttributeMap attrs;
attrs.insert({"file_path", std::string("tensor.save")});
auto save_op = paddle::framework::OpRegistry::CreateOp(
"save", {{"X", {"test_var"}}}, {}, attrs);
save_op->Run(scope, place);

// result var and tensor
auto load_var = scope.Var("out_var");
auto target = load_var->GetMutable<phi::DenseTensor>();

// run load op
auto load_op = paddle::framework::OpRegistry::CreateOp(
"load", {}, {{"Out", {"out_var"}}}, attrs);
load_op->Run(scope, place);

// copy result tensor data to cpu
T* actual = target->data<T>();
std::vector<T> actual_cpu(dim_1 * dim_2);
paddle::memory::Copy(cpu_place,
actual_cpu.data(),
place,
actual,
sizeof(T) * ground_truth_cpu.size());

// check result: data
for (int i = 0; i < dim_1 * dim_2; i++) {
if (actual_cpu[i] != ground_truth_cpu[i]) {
return 1;
}
}

// check result: lod
auto& actual_lod = target->lod();
if (expect_lod.size() != actual_lod.size()) {
return 1;
}
for (size_t i = 0; i < expect_lod.size(); ++i) { // NOLINT
for (size_t j = 0; j < expect_lod[i].size(); ++j) {
if (expect_lod[i][j] != actual_lod[i][j]) {
return 1;
}
}
}
return 0;
}

TEST(SaveLoadOp, XPU) {
paddle::platform::XPUPlace xpu_place(0);
paddle::platform::CPUPlace cpu_place;
int r = 0;

r = SaveLoadOpTest<paddle::platform::XPUPlace, float>(xpu_place, 3, 10);
EXPECT_EQ(r, 0);
r = SaveLoadOpTest<paddle::platform::CPUPlace, float>(cpu_place, 3, 10);
EXPECT_EQ(r, 0);

r = SaveLoadOpTest<paddle::platform::XPUPlace, int>(xpu_place, 2, 128);
EXPECT_EQ(r, 0);
r = SaveLoadOpTest<paddle::platform::CPUPlace, int>(cpu_place, 2, 128);
EXPECT_EQ(r, 0);

r = SaveLoadOpTest<paddle::platform::XPUPlace, paddle::platform::float16>(
xpu_place, 2, 128);
EXPECT_EQ(r, 0);
r = SaveLoadOpTest<paddle::platform::CPUPlace, paddle::platform::float16>(
cpu_place, 2, 128);
EXPECT_EQ(r, 0);

r = SaveLoadOpTest<paddle::platform::XPUPlace, paddle::platform::bfloat16>(
xpu_place, 4, 32);
EXPECT_EQ(r, 0);
r = SaveLoadOpTest<paddle::platform::CPUPlace, paddle::platform::bfloat16>(
cpu_place, 4, 32);
EXPECT_EQ(r, 0);
}