diff --git a/paddle/fluid/operators/save_op.cc b/paddle/fluid/operators/save_op.cc index f025d278074215..8b0f0eb45ffa5c 100644 --- a/paddle/fluid/operators/save_op.cc +++ b/paddle/fluid/operators/save_op.cc @@ -105,6 +105,23 @@ PD_REGISTER_KERNEL(save, kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND); } +#ifdef PADDLE_WITH_XPU +PD_REGISTER_KERNEL(save, + XPU, + ALL_LAYOUT, + ops::SaveKernel, + float, + double, + int, + uint8_t, + int8_t, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16) { + kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND); +} +#endif + PD_REGISTER_KERNEL(save_sr, CPU, ALL_LAYOUT, diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index 5a371aa14116ed..a5681c7eaeef19 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -822,6 +822,15 @@ XPUOpMap& get_kl2_ops() { {"roll_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"rsqrt", XPUKernelSet({phi::DataType::FLOAT32})}, {"rsqrt_grad", XPUKernelSet({phi::DataType::FLOAT32})}, + {"save", + XPUKernelSet({phi::DataType::FLOAT32, + phi::DataType::FLOAT64, + phi::DataType::INT32, + phi::DataType::UINT8, + phi::DataType::INT8, + phi::DataType::INT64, + phi::DataType::FLOAT16, + phi::DataType::BFLOAT16})}, {"scale", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16, diff --git a/paddle/phi/backends/xpu/xpu3_op_list.cc b/paddle/phi/backends/xpu/xpu3_op_list.cc index 54f56f2bd93613..e486bf9b224f04 100644 --- a/paddle/phi/backends/xpu/xpu3_op_list.cc +++ b/paddle/phi/backends/xpu/xpu3_op_list.cc @@ -828,6 +828,15 @@ XPUOpMap& get_kl3_ops() { {"roll_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"rsqrt", XPUKernelSet({phi::DataType::FLOAT32})}, {"rsqrt_grad", XPUKernelSet({phi::DataType::FLOAT32})}, + {"save", + XPUKernelSet({phi::DataType::FLOAT32, + phi::DataType::FLOAT64, + phi::DataType::INT32, + phi::DataType::UINT8, + phi::DataType::INT8, + phi::DataType::INT64, + phi::DataType::FLOAT16, + phi::DataType::BFLOAT16})}, {"scale", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16, diff --git a/test/cpp/fluid/CMakeLists.txt b/test/cpp/fluid/CMakeLists.txt index 17d71d85c0d00a..a6b6ce43dfb7e3 100644 --- a/test/cpp/fluid/CMakeLists.txt +++ b/test/cpp/fluid/CMakeLists.txt @@ -28,6 +28,9 @@ paddle_test(assign_op_test SRCS assign_op_test.cc) paddle_test(scatter_test SRCS scatter_test.cc DEPS common) paddle_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc) paddle_test(save_load_op_test SRCS save_load_op_test.cc) +if(WITH_XPU) + paddle_test(save_load_op_test_xpu SRCS save_load_op_test_xpu.cc) +endif() paddle_test(save_load_combine_op_test SRCS save_load_combine_op_test.cc) if(WITH_CINN) set(CINN_DEPS python) diff --git a/test/cpp/fluid/save_load_op_test_xpu.cc b/test/cpp/fluid/save_load_op_test_xpu.cc new file mode 100644 index 00000000000000..9541889c7e0c10 --- /dev/null +++ b/test/cpp/fluid/save_load_op_test_xpu.cc @@ -0,0 +1,123 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/core/kernel_registry.h" + +template +int SaveLoadOpTest(Place place, int dim_1, int dim_2) { + // use cpu place for ground truth + paddle::platform::CPUPlace cpu_place; + std::vector ground_truth_cpu(dim_1 * dim_2); + for (int i = 0; i < dim_1 * dim_2; i++) { + ground_truth_cpu[i] = static_cast(i); + } + + // scope, var, tensor and lod + paddle::framework::Scope scope; + auto var = scope.Var("test_var"); + auto tensor = var->GetMutable(); + tensor->Resize({dim_1, dim_2}); + paddle::framework::LoD expect_lod; + expect_lod.resize(1); + for (int i = 0; i < dim_1; i++) { + expect_lod[0].push_back(i); + } + tensor->set_lod(expect_lod); + T* src_mutable = tensor->mutable_data(place); + // copy cpu data to tensor + paddle::memory::Copy(place, + src_mutable, + cpu_place, + ground_truth_cpu.data(), + sizeof(T) * ground_truth_cpu.size()); + + // run save op + paddle::framework::AttributeMap attrs; + attrs.insert({"file_path", std::string("tensor.save")}); + auto save_op = paddle::framework::OpRegistry::CreateOp( + "save", {{"X", {"test_var"}}}, {}, attrs); + save_op->Run(scope, place); + + // result var and tensor + auto load_var = scope.Var("out_var"); + auto target = load_var->GetMutable(); + + // run load op + auto load_op = paddle::framework::OpRegistry::CreateOp( + "load", {}, {{"Out", {"out_var"}}}, attrs); + load_op->Run(scope, place); + + // copy result tensor data to cpu + T* actual = target->data(); + std::vector actual_cpu(dim_1 * dim_2); + paddle::memory::Copy(cpu_place, + actual_cpu.data(), + place, + actual, + sizeof(T) * ground_truth_cpu.size()); + + // check result: data + for (int i = 0; i < dim_1 * dim_2; i++) { + if (actual_cpu[i] != ground_truth_cpu[i]) { + return 1; + } + } + + // check result: lod + auto& actual_lod = target->lod(); + if (expect_lod.size() != actual_lod.size()) { + return 1; + } + for (size_t i = 0; i < expect_lod.size(); ++i) { // NOLINT + for (size_t j = 0; j < expect_lod[i].size(); ++j) { + if (expect_lod[i][j] != actual_lod[i][j]) { + return 1; + } + } + } + return 0; +} + +TEST(SaveLoadOp, XPU) { + paddle::platform::XPUPlace xpu_place(0); + paddle::platform::CPUPlace cpu_place; + int r = 0; + + r = SaveLoadOpTest(xpu_place, 3, 10); + EXPECT_EQ(r, 0); + r = SaveLoadOpTest(cpu_place, 3, 10); + EXPECT_EQ(r, 0); + + r = SaveLoadOpTest(xpu_place, 2, 128); + EXPECT_EQ(r, 0); + r = SaveLoadOpTest(cpu_place, 2, 128); + EXPECT_EQ(r, 0); + + r = SaveLoadOpTest( + xpu_place, 2, 128); + EXPECT_EQ(r, 0); + r = SaveLoadOpTest( + cpu_place, 2, 128); + EXPECT_EQ(r, 0); + + r = SaveLoadOpTest( + xpu_place, 4, 32); + EXPECT_EQ(r, 0); + r = SaveLoadOpTest( + cpu_place, 4, 32); + EXPECT_EQ(r, 0); +}