From 021197ed2179d6f33c394155dfb0fb044edc1847 Mon Sep 17 00:00:00 2001 From: tc20042008 <156998525+tc20042008@users.noreply.github.com> Date: Thu, 4 Jul 2024 10:07:44 +0800 Subject: [PATCH 01/16] [CINN] Dump pir program only once (#65681) * dump FeedOp tensor meta * dump pir program only once --------- Co-authored-by: jiahy0825 --- .../transforms/pir_to_py_code_converter.cc | 90 ++++++++++++------- 1 file changed, 58 insertions(+), 32 deletions(-) diff --git a/paddle/cinn/hlir/dialect/operator/transforms/pir_to_py_code_converter.cc b/paddle/cinn/hlir/dialect/operator/transforms/pir_to_py_code_converter.cc index 8ad51581c1a740..473f1c9de1b485 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/pir_to_py_code_converter.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/pir_to_py_code_converter.cc @@ -32,6 +32,7 @@ #include "paddle/fluid/framework/feed_hook.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/pir/dialect/kernel/ir/kernel_attribute.h" +#include "paddle/fluid/pir/dialect/kernel/ir/kernel_op.h" #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" #include "paddle/fluid/pir/dialect/operator/utils/utils.h" @@ -75,23 +76,34 @@ void VisitFeedName(const pir::Program& program, const DoEachFeadNameT& DoEachFeadName) { auto module_op = program.module_op(); const auto& block = module_op.block(); - const auto& IsDataOp = [](const pir::Operation& op) -> bool { - return op.isa(); - }; - const auto& GetDataOpName = [](const pir::Operation& op) -> std::string { + auto GetDataOpName = + [](const pir::Operation& op) -> std::optional { + if (!op.isa()) return std::nullopt; return op.attributes().at("name").dyn_cast().AsString(); }; - const auto& IsFeedOp = [](const pir::Operation& op) -> bool { - return op.isa(); + auto GetFeedOpName = + [](const pir::Operation& op) -> std::optional { + if (!op.isa()) return std::nullopt; + return op.attributes().at("name").dyn_cast().AsString(); }; - const auto& GetFeedOpName = [](const pir::Operation& op) -> std::string { + auto GetPhiFeedOpName = + [](const pir::Operation& op) -> std::optional { + if (!op.isa()) return std::nullopt; + const auto& attributes = op.attributes(); + const auto& op_name_it = attributes.find("op_name"); + if (op_name_it == attributes.end()) return std::nullopt; + const auto& op_name = + op_name_it->second.dyn_cast().AsString(); + if (op_name != "pd_op.feed") return std::nullopt; return op.attributes().at("name").dyn_cast().AsString(); }; for (const auto& op : block) { - if (IsDataOp(op)) { - DoEachFeadName(GetDataOpName(op)); - } else if (IsFeedOp(op)) { - DoEachFeadName(GetFeedOpName(op)); + if (const auto& name = GetDataOpName(op)) { + DoEachFeadName(name.value()); + } else if (const auto& name = GetFeedOpName(op)) { + DoEachFeadName(name.value()); + } else if (const auto& name = GetPhiFeedOpName(op)) { + DoEachFeadName(name.value()); } else { // Do nothing. } @@ -1431,34 +1443,48 @@ std::optional GetNullShapeAnalysis( return std::nullopt; } +void TryTruncateLogginFile(const std::string& file_path) { + if (!FLAGS_logging_trunc_pir_py_code) return; + static std::mutex mutex; + std::unique_lock lock(mutex); + static std::unordered_map once_flags; + std::call_once(once_flags[file_path], [&] { + std::ofstream ofs; + ofs.open(file_path.c_str(), std::ios::out | std::ios::trunc); + ofs.close(); + }); +} + } // namespace void PirToPyCodeConverter::SaveIfFlagEnabled() const { if (program_ == nullptr) return; if (file_name_.empty()) return; - if (FLAGS_logging_pir_py_code_dir == "") return; + if (FLAGS_logging_pir_py_code_dir.empty()) return; const std::string file_path = FLAGS_logging_pir_py_code_dir + "/" + file_name_; - ShapeAnalysisGetterT ShapeAnalysisGetter = - (dump_symbolic_shape_ ? GetShapeAnalysisFromManager - : GetNullShapeAnalysis); - PirToPyCodeConverterHelper converter_helper(program_, ShapeAnalysisGetter); - const std::string content = converter_helper.Convert(); - static std::mutex mutex; - std::unique_lock lock(mutex); - if (FLAGS_logging_trunc_pir_py_code) { - static std::unordered_map once_flags; - std::call_once(once_flags[file_path], [&] { - std::ofstream ofs; - ofs.open(file_path.c_str(), std::ios::out | std::ios::trunc); - ofs.close(); - }); - } - std::ofstream ofs; - ofs.open(file_path.c_str(), std::ios::out | std::ios::app); - if (!ofs.is_open()) return; - ofs << content << std::endl; - ofs.close(); + TryTruncateLogginFile(file_path); + const auto MutOnceFlag = [&]() -> std::once_flag* { + static std::mutex mutex; + std::unique_lock lock(mutex); + using FileName = std::string; + using FileName2OnceFlag = std::unordered_map; + using ProgramId = int64_t; + static std::unordered_map once_flags; + return &once_flags[program_->id()][file_name_]; + }; + std::call_once(*MutOnceFlag(), [&] { + ShapeAnalysisGetterT ShapeAnalysisGetter = + (dump_symbolic_shape_ ? GetShapeAnalysisFromManager + : GetNullShapeAnalysis); + PirToPyCodeConverterHelper converter_helper(program_, ShapeAnalysisGetter); + const std::string content = converter_helper.Convert(); + std::ofstream ofs; + ofs.open(file_path.c_str(), std::ios::out | std::ios::app); + if (!ofs.is_open()) return; + ofs << content << std::endl; + ofs.close(); + }); } void DumpExecProgram(const pir::Program& program, From 5b2e91444539c39eaa921433ae976a00dce2d408 Mon Sep 17 00:00:00 2001 From: Frank Lin Date: Thu, 4 Jul 2024 10:41:36 +0800 Subject: [PATCH 02/16] Store allocation ptr in vector (#65024) Co-authored-by: lawrence910426 --- paddle/phi/kernels/funcs/segmented_array.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/paddle/phi/kernels/funcs/segmented_array.h b/paddle/phi/kernels/funcs/segmented_array.h index 4b4b1b59db66eb..24046da52aeeeb 100644 --- a/paddle/phi/kernels/funcs/segmented_array.h +++ b/paddle/phi/kernels/funcs/segmented_array.h @@ -112,7 +112,7 @@ struct ArraySetterBase { void* src, size_t num_bytes, bool use_cuda_graph = false) { - allocation = phi::memory_utils::Alloc( + auto allocation = phi::memory_utils::Alloc( ctx.GetPlace(), num_bytes, phi::Stream(reinterpret_cast(ctx.stream()))); @@ -129,10 +129,13 @@ struct ArraySetterBase { num_bytes, phi::gpuMemcpyHostToDevice, ctx.stream()); - return allocation->ptr(); + + auto ptr = allocation->ptr(); + allocations.emplace_back(std::move(allocation)); + return ptr; } - phi::Allocator::AllocationPtr allocation{nullptr}; + std::vector allocations; }; template From f461d862eee70f824243ef45ed998166f455ba71 Mon Sep 17 00:00:00 2001 From: zhangyikun02 <48021248+zhangyk0314@users.noreply.github.com> Date: Thu, 4 Jul 2024 11:30:02 +0800 Subject: [PATCH 03/16] [XPU] mean_grad support bf16 for XPU (#65684) --- paddle/phi/backends/xpu/xpu3_op_list.cc | 4 +++- paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/paddle/phi/backends/xpu/xpu3_op_list.cc b/paddle/phi/backends/xpu/xpu3_op_list.cc index 54f56f2bd93613..e27587c8596f02 100644 --- a/paddle/phi/backends/xpu/xpu3_op_list.cc +++ b/paddle/phi/backends/xpu/xpu3_op_list.cc @@ -668,7 +668,9 @@ XPUOpMap& get_kl3_ops() { phi::DataType::BFLOAT16, phi::DataType::FLOAT16})}, {"mean_grad", - XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, + XPUKernelSet({phi::DataType::FLOAT32, + phi::DataType::FLOAT16, + phi::DataType::BFLOAT16})}, {"mean", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16, diff --git a/paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc b/paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc index 37ace904b2b807..de5b4718e98603 100644 --- a/paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc @@ -89,4 +89,5 @@ PD_REGISTER_KERNEL(mean_grad, ALL_LAYOUT, phi::ReduceMeanGradKernel, float, - phi::dtype::float16) {} + phi::dtype::float16, + phi::dtype::bfloat16) {} From a30c8a5ab063e3e0521267fb7610823681adf7b0 Mon Sep 17 00:00:00 2001 From: Frank Lin Date: Thu, 4 Jul 2024 11:45:06 +0800 Subject: [PATCH 04/16] Disabling Unrelated Tests When Enabling CUDA Async Allocator in CI (#65094) * Either stream safe or async allocator * Ignore if not enabled * fix: ignore cuda managed * fix: disable async allocator * fix: either async or stream safe * fix useless if --------- Co-authored-by: lawrence910426 --- .../garbage_collector/garbage_collector.h | 13 +++++++++- .../memory/allocation/allocator_facade.cc | 10 +++++++ test/cpp/fluid/memory/CMakeLists.txt | 5 ++-- .../memory/stream_safe_cuda_alloc_test.cu | 26 +++++++++++++++++++ .../test_auto_growth_allocator_gpu.py | 2 ++ 5 files changed, 53 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.h b/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.h index 9c9b40631eaa93..6208130a67ca75 100644 --- a/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.h +++ b/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.h @@ -53,13 +53,24 @@ inline bool IsInterpretercoreFastGCEnabled() { // When using cuda graph, fast GC must be used. Because // `EventQuery` method in event GC cannot be used in // cuda graph. + PADDLE_ENFORCE_EQ(memory::allocation::AllocatorFacade::Instance() + .IsStreamSafeCUDAAllocatorUsed() == true && + memory::allocation::AllocatorFacade::Instance() + .IsCUDAMallocAsyncAllocatorUsed() == true, + false, + platform::errors::InvalidArgument( + "StreamSafeAllocator and AsyncAllocator shouldn't be " + "True together.")); PADDLE_ENFORCE_EQ(memory::allocation::AllocatorFacade::Instance() .IsStreamSafeCUDAAllocatorUsed() == false && + memory::allocation::AllocatorFacade::Instance() + .IsCUDAMallocAsyncAllocatorUsed() == false && FLAGS_new_executor_use_cuda_graph, false, platform::errors::InvalidArgument( "When FLAGS_new_executor_use_cuda_graph is true, " - "IsStreamSafeCUDAAllocatorUsed must be true, but " + "Either IsStreamSafeCUDAAllocatorUsed or " + "IsCUDAMallocAsyncAllocatorUsed must be true, but " "got false.")); return (memory::allocation::AllocatorFacade::Instance() .IsStreamSafeCUDAAllocatorUsed() && diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index eef6c1a1e8c4ac..b81bfd0400d99f 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -264,6 +264,11 @@ class AllocatorFacadePrivate { // application, treating it separately can avoid lots of overhead of // acquiring default stream and applying read-write lock. if (FLAGS_use_cuda_malloc_async_allocator) { + PADDLE_ENFORCE_EQ(FLAGS_use_cuda_managed_memory, + false, + platform::errors::InvalidArgument( + "Async allocator cannot be used with CUDA " + "managed memory.")); WrapCUDAMallocAsyncAllocatorForDefault(); is_cuda_malloc_async_allocator_used_ = true; } else { @@ -871,6 +876,11 @@ class AllocatorFacadePrivate { "the allocator strategy %d is unsupported for multi-stream", static_cast(strategy_))); if (FLAGS_use_cuda_malloc_async_allocator) { + PADDLE_ENFORCE_EQ( + FLAGS_use_cuda_managed_memory, + false, + platform::errors::InvalidArgument( + "Async allocator cannot be used with CUDA managed memory.")); VLOG(8) << "[CUDAMallocAsyncAllocator] Init CUDA allocator for stream " << stream << " in place " << p; InitCUDAMallocAsyncAllocator(p, stream); diff --git a/test/cpp/fluid/memory/CMakeLists.txt b/test/cpp/fluid/memory/CMakeLists.txt index 5bb36f73982287..a7c2e6df4411c6 100644 --- a/test/cpp/fluid/memory/CMakeLists.txt +++ b/test/cpp/fluid/memory/CMakeLists.txt @@ -116,8 +116,9 @@ if(WITH_TESTING AND TEST cuda_managed_memory_test) cuda_managed_memory_test PROPERTIES ENVIRONMENT - "FLAGS_use_cuda_managed_memory=true;FLAGS_allocator_strategy=auto_growth" - TIMEOUT 50) + "FLAGS_use_cuda_managed_memory=true;FLAGS_use_cuda_malloc_async_allocator=false;FLAGS_allocator_strategy=auto_growth" + TIMEOUT + 50) endif() if(WITH_GPU AND WITH_TESTING) diff --git a/test/cpp/fluid/memory/stream_safe_cuda_alloc_test.cu b/test/cpp/fluid/memory/stream_safe_cuda_alloc_test.cu index b0bebf5202eee2..91e896c803bec0 100644 --- a/test/cpp/fluid/memory/stream_safe_cuda_alloc_test.cu +++ b/test/cpp/fluid/memory/stream_safe_cuda_alloc_test.cu @@ -33,6 +33,14 @@ #include #endif +#define RETURN_IF_NOT_ENABLED \ + { \ + if (!memory::allocation::AllocatorFacade::Instance() \ + .IsStreamSafeCUDAAllocatorUsed()) { \ + return; \ + } \ + } + namespace paddle { namespace memory { @@ -54,6 +62,8 @@ void CheckMemLeak(const platform::CUDAPlace &place) { } TEST(StreamSafeCUDAAllocInterfaceTest, AllocInterfaceTest) { + RETURN_IF_NOT_ENABLED; + platform::CUDAPlace place = platform::CUDAPlace(); size_t alloc_size = 256; @@ -81,6 +91,8 @@ TEST(StreamSafeCUDAAllocInterfaceTest, AllocInterfaceTest) { } TEST(StreamSafeCUDAAllocInterfaceTest, GetAllocatorInterfaceTest) { + RETURN_IF_NOT_ENABLED; + platform::CUDAPlace place = platform::CUDAPlace(); size_t alloc_size = 256; @@ -104,6 +116,8 @@ TEST(StreamSafeCUDAAllocInterfaceTest, GetAllocatorInterfaceTest) { } TEST(StreamSafeCUDAAllocInterfaceTest, GetAllocatorWithDefaultStreamTest) { + RETURN_IF_NOT_ENABLED; + auto &instance = allocation::AllocatorFacade::Instance(); platform::CUDAPlace place = platform::CUDAPlace(); const std::shared_ptr allocator_implicit_stream = @@ -118,6 +132,8 @@ TEST(StreamSafeCUDAAllocInterfaceTest, GetAllocatorWithDefaultStreamTest) { } TEST(StreamSafeCUDAAllocInterfaceTest, ZeroSizeRecordStreamTest) { + RETURN_IF_NOT_ENABLED; + platform::CUDAPlace place = platform::CUDAPlace(); std::shared_ptr zero_size_allocation = AllocShared(place, 0); EXPECT_EQ(zero_size_allocation->ptr(), nullptr); @@ -139,6 +155,8 @@ TEST(StreamSafeCUDAAllocInterfaceTest, ZeroSizeRecordStreamTest) { } TEST(StreamSafeCUDAAllocInterfaceTest, GetStreamInterfaceTest) { + RETURN_IF_NOT_ENABLED; + platform::CUDAPlace place = platform::CUDAPlace(); size_t alloc_size = 256; @@ -176,6 +194,8 @@ TEST(StreamSafeCUDAAllocInterfaceTest, GetStreamInterfaceTest) { } TEST(StreamSafeCUDAAllocRetryTest, RetryTest) { + RETURN_IF_NOT_ENABLED; + platform::CUDAPlace place = platform::CUDAPlace(); gpuStream_t stream1, stream2; #ifdef PADDLE_WITH_CUDA @@ -403,17 +423,23 @@ class StreamSafeCUDAAllocTest : public ::testing::Test { }; TEST_F(StreamSafeCUDAAllocTest, CUDAMutilStreamTest) { + RETURN_IF_NOT_ENABLED; + MultiStreamRun(); CheckResult(); } TEST_F(StreamSafeCUDAAllocTest, CUDAMutilThreadMutilStreamTest) { + RETURN_IF_NOT_ENABLED; + MultiThreadMultiStreamRun(); CheckResult(); } #if (defined(PADDLE_WITH_CUDA) && (CUDA_VERSION >= 11000)) TEST_F(StreamSafeCUDAAllocTest, CUDAGraphTest) { + RETURN_IF_NOT_ENABLED; + MultiStreamRun(); CUDAGraphRun(); CheckResult(); diff --git a/test/legacy_test/test_auto_growth_allocator_gpu.py b/test/legacy_test/test_auto_growth_allocator_gpu.py index 3ac11c1baf86fb..c20c825032d6ac 100644 --- a/test/legacy_test/test_auto_growth_allocator_gpu.py +++ b/test/legacy_test/test_auto_growth_allocator_gpu.py @@ -25,6 +25,8 @@ { 'FLAGS_allocator_strategy': 'auto_growth', 'FLAGS_auto_growth_chunk_size_in_mb': 10, + # Async allocator does not support auto growth allocator. + 'FLAGS_use_cuda_malloc_async_allocator': 0, } ) From 216bfcc1b6747d1add85a4c09e25a2452c66d6ef Mon Sep 17 00:00:00 2001 From: houj04 <35131887+houj04@users.noreply.github.com> Date: Thu, 4 Jul 2024 13:51:37 +0800 Subject: [PATCH 05/16] [XPU] ut for save and load op (#65656) --- paddle/fluid/operators/save_op.cc | 17 ++++ paddle/phi/backends/xpu/xpu2_op_list.cc | 9 ++ paddle/phi/backends/xpu/xpu3_op_list.cc | 9 ++ test/cpp/fluid/CMakeLists.txt | 3 + test/cpp/fluid/save_load_op_test_xpu.cc | 123 ++++++++++++++++++++++++ 5 files changed, 161 insertions(+) create mode 100644 test/cpp/fluid/save_load_op_test_xpu.cc diff --git a/paddle/fluid/operators/save_op.cc b/paddle/fluid/operators/save_op.cc index f025d278074215..8b0f0eb45ffa5c 100644 --- a/paddle/fluid/operators/save_op.cc +++ b/paddle/fluid/operators/save_op.cc @@ -105,6 +105,23 @@ PD_REGISTER_KERNEL(save, kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND); } +#ifdef PADDLE_WITH_XPU +PD_REGISTER_KERNEL(save, + XPU, + ALL_LAYOUT, + ops::SaveKernel, + float, + double, + int, + uint8_t, + int8_t, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16) { + kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND); +} +#endif + PD_REGISTER_KERNEL(save_sr, CPU, ALL_LAYOUT, diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index 5a371aa14116ed..a5681c7eaeef19 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -822,6 +822,15 @@ XPUOpMap& get_kl2_ops() { {"roll_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"rsqrt", XPUKernelSet({phi::DataType::FLOAT32})}, {"rsqrt_grad", XPUKernelSet({phi::DataType::FLOAT32})}, + {"save", + XPUKernelSet({phi::DataType::FLOAT32, + phi::DataType::FLOAT64, + phi::DataType::INT32, + phi::DataType::UINT8, + phi::DataType::INT8, + phi::DataType::INT64, + phi::DataType::FLOAT16, + phi::DataType::BFLOAT16})}, {"scale", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16, diff --git a/paddle/phi/backends/xpu/xpu3_op_list.cc b/paddle/phi/backends/xpu/xpu3_op_list.cc index e27587c8596f02..f3abe1726a5053 100644 --- a/paddle/phi/backends/xpu/xpu3_op_list.cc +++ b/paddle/phi/backends/xpu/xpu3_op_list.cc @@ -830,6 +830,15 @@ XPUOpMap& get_kl3_ops() { {"roll_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"rsqrt", XPUKernelSet({phi::DataType::FLOAT32})}, {"rsqrt_grad", XPUKernelSet({phi::DataType::FLOAT32})}, + {"save", + XPUKernelSet({phi::DataType::FLOAT32, + phi::DataType::FLOAT64, + phi::DataType::INT32, + phi::DataType::UINT8, + phi::DataType::INT8, + phi::DataType::INT64, + phi::DataType::FLOAT16, + phi::DataType::BFLOAT16})}, {"scale", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16, diff --git a/test/cpp/fluid/CMakeLists.txt b/test/cpp/fluid/CMakeLists.txt index 17d71d85c0d00a..a6b6ce43dfb7e3 100644 --- a/test/cpp/fluid/CMakeLists.txt +++ b/test/cpp/fluid/CMakeLists.txt @@ -28,6 +28,9 @@ paddle_test(assign_op_test SRCS assign_op_test.cc) paddle_test(scatter_test SRCS scatter_test.cc DEPS common) paddle_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc) paddle_test(save_load_op_test SRCS save_load_op_test.cc) +if(WITH_XPU) + paddle_test(save_load_op_test_xpu SRCS save_load_op_test_xpu.cc) +endif() paddle_test(save_load_combine_op_test SRCS save_load_combine_op_test.cc) if(WITH_CINN) set(CINN_DEPS python) diff --git a/test/cpp/fluid/save_load_op_test_xpu.cc b/test/cpp/fluid/save_load_op_test_xpu.cc new file mode 100644 index 00000000000000..9541889c7e0c10 --- /dev/null +++ b/test/cpp/fluid/save_load_op_test_xpu.cc @@ -0,0 +1,123 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/core/kernel_registry.h" + +template +int SaveLoadOpTest(Place place, int dim_1, int dim_2) { + // use cpu place for ground truth + paddle::platform::CPUPlace cpu_place; + std::vector ground_truth_cpu(dim_1 * dim_2); + for (int i = 0; i < dim_1 * dim_2; i++) { + ground_truth_cpu[i] = static_cast(i); + } + + // scope, var, tensor and lod + paddle::framework::Scope scope; + auto var = scope.Var("test_var"); + auto tensor = var->GetMutable(); + tensor->Resize({dim_1, dim_2}); + paddle::framework::LoD expect_lod; + expect_lod.resize(1); + for (int i = 0; i < dim_1; i++) { + expect_lod[0].push_back(i); + } + tensor->set_lod(expect_lod); + T* src_mutable = tensor->mutable_data(place); + // copy cpu data to tensor + paddle::memory::Copy(place, + src_mutable, + cpu_place, + ground_truth_cpu.data(), + sizeof(T) * ground_truth_cpu.size()); + + // run save op + paddle::framework::AttributeMap attrs; + attrs.insert({"file_path", std::string("tensor.save")}); + auto save_op = paddle::framework::OpRegistry::CreateOp( + "save", {{"X", {"test_var"}}}, {}, attrs); + save_op->Run(scope, place); + + // result var and tensor + auto load_var = scope.Var("out_var"); + auto target = load_var->GetMutable(); + + // run load op + auto load_op = paddle::framework::OpRegistry::CreateOp( + "load", {}, {{"Out", {"out_var"}}}, attrs); + load_op->Run(scope, place); + + // copy result tensor data to cpu + T* actual = target->data(); + std::vector actual_cpu(dim_1 * dim_2); + paddle::memory::Copy(cpu_place, + actual_cpu.data(), + place, + actual, + sizeof(T) * ground_truth_cpu.size()); + + // check result: data + for (int i = 0; i < dim_1 * dim_2; i++) { + if (actual_cpu[i] != ground_truth_cpu[i]) { + return 1; + } + } + + // check result: lod + auto& actual_lod = target->lod(); + if (expect_lod.size() != actual_lod.size()) { + return 1; + } + for (size_t i = 0; i < expect_lod.size(); ++i) { // NOLINT + for (size_t j = 0; j < expect_lod[i].size(); ++j) { + if (expect_lod[i][j] != actual_lod[i][j]) { + return 1; + } + } + } + return 0; +} + +TEST(SaveLoadOp, XPU) { + paddle::platform::XPUPlace xpu_place(0); + paddle::platform::CPUPlace cpu_place; + int r = 0; + + r = SaveLoadOpTest(xpu_place, 3, 10); + EXPECT_EQ(r, 0); + r = SaveLoadOpTest(cpu_place, 3, 10); + EXPECT_EQ(r, 0); + + r = SaveLoadOpTest(xpu_place, 2, 128); + EXPECT_EQ(r, 0); + r = SaveLoadOpTest(cpu_place, 2, 128); + EXPECT_EQ(r, 0); + + r = SaveLoadOpTest( + xpu_place, 2, 128); + EXPECT_EQ(r, 0); + r = SaveLoadOpTest( + cpu_place, 2, 128); + EXPECT_EQ(r, 0); + + r = SaveLoadOpTest( + xpu_place, 4, 32); + EXPECT_EQ(r, 0); + r = SaveLoadOpTest( + cpu_place, 4, 32); + EXPECT_EQ(r, 0); +} From 5884585f165ace589b2b9051ae317f67c88ca3bf Mon Sep 17 00:00:00 2001 From: Hongqing-work <76149632+Hongqing-work@users.noreply.github.com> Date: Thu, 4 Jul 2024 14:12:56 +0800 Subject: [PATCH 06/16] [CINN]revert of move ShapeOptimization before PdToCinn (#65675) --- paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc index 56db312570df6d..e9c7bbb41c3305 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc @@ -246,8 +246,10 @@ void ApplyCinnPass(::pir::Program* program, .file_name("original_programs.py") .dump_symbolic_shape(FLAGS_logging_pir_py_code_dump_symbolic_dims) .SaveIfFlagEnabled(); - ApplyShapeOptimizationPass(program, CreatePassManager); ApplyPdToCinnPass(program, CreatePassManager); + // TODO(Hongqing-work): move ApplyShapeOptimizationPass before + // ApplyPdToCinnPass after fixing infer shape bug. + ApplyShapeOptimizationPass(program, CreatePassManager); ApplyCinnPreprocessPass(program, CreatePassManager); ApplyBuildGroupOpPass(program, CreatePassManager); PirToPyCodeConverter(program) From 091044b7ddecb851f2a02e680549ac296f1afc64 Mon Sep 17 00:00:00 2001 From: Yuanle Liu Date: Thu, 4 Jul 2024 14:21:55 +0800 Subject: [PATCH 07/16] inference use FLAGS_enable_pir_api control pir mode (#65596) * inference use FLAGS_enable_pir_api control pir mode * fix ut * fix --- paddle/fluid/inference/api/analysis_predictor.cc | 5 +++++ test/cpp/jit/layer_test.cc | 14 ++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 948a4b24f29c71..2f43ae37d4d8e6 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -139,6 +139,7 @@ #include "paddle/pir/include/pass/pass_registry.h" COMMON_DECLARE_bool(pir_apply_inplace_pass); +COMMON_DECLARE_bool(enable_pir_api); namespace paddle { namespace { @@ -390,6 +391,10 @@ AnalysisPredictor::AnalysisPredictor(const AnalysisConfig &config) if (config_.shape_range_info_collected()) { config_.SwitchIrOptim(false); } + if (FLAGS_enable_pir_api) { + config_.EnableNewExecutor(true); + config_.EnableNewIR(true); + } if (config_.new_executor_enabled()) { config_.EnableMemoryOptim(false); if (config_.new_ir_enabled()) { diff --git a/test/cpp/jit/layer_test.cc b/test/cpp/jit/layer_test.cc index 42fd976f6dbdd3..57c7bd9dedfbd9 100644 --- a/test/cpp/jit/layer_test.cc +++ b/test/cpp/jit/layer_test.cc @@ -55,6 +55,8 @@ PD_DECLARE_KERNEL(mean, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(scale, GPU, ALL_LAYOUT); #endif +COMMON_DECLARE_bool(enable_pir_api); + namespace paddle { namespace jit { using DenseTensor = phi::DenseTensor; @@ -77,6 +79,9 @@ TEST(CpuLayerTest, Function) { } TEST(CpuLayerTest, Construct) { + if (FLAGS_enable_pir_api) { + return; + } auto place = phi::CPUPlace(); std::string path = "./multi_program_load/export"; paddle::platform::Timer timer; @@ -125,6 +130,9 @@ TEST(CpuLayerTest, Construct) { } TEST(CpuLayerTest, Clone) { + if (FLAGS_enable_pir_api) { + return; + } auto place = phi::CPUPlace(); std::string path = "./multi_program_load/export"; @@ -161,6 +169,9 @@ TEST(CpuLayerTest, Clone) { #if defined(PADDLE_WITH_CUDA) TEST(GpuLayerTest, Construct) { + if (FLAGS_enable_pir_api) { + return; + } auto place = phi::GPUPlace(); std::string path = "./multi_program_load/export"; @@ -189,6 +200,9 @@ TEST(GpuLayerTest, Construct) { } TEST(GpuLayerTest, Clone) { + if (FLAGS_enable_pir_api) { + return; + } auto place = phi::GPUPlace(); std::string path = "./multi_program_load/export"; From 218e62d8acf1167bccc2fee1e9097f269961a5e6 Mon Sep 17 00:00:00 2001 From: Botao Zhou <1095497213@qq.com> Date: Thu, 4 Jul 2024 14:30:27 +0800 Subject: [PATCH 08/16] =?UTF-8?q?=E3=80=90Hackathon=206th=20No.28=E3=80=91?= =?UTF-8?q?=E4=B8=BA=20paddle.round=20=E8=BF=9B=E8=A1=8C=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=E5=A2=9E=E5=BC=BA=20-part=20(#64436)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add decimals for round * set defalut value * fix * fix round inplace * add round inplace func * empty * fix round on onednn * fix * remove redundant comments * re-run * change calculation process * fix bug * fix bug * fix bug * fix bug * fix bug * fix bug * fix bug * fix coverage * add attr in yaml file --- paddle/phi/kernels/activation_kernel.h | 7 +++- paddle/phi/kernels/cpu/activation_kernel.cc | 14 +++++++- paddle/phi/kernels/funcs/activation_functor.h | 33 +++++++++++++++-- paddle/phi/kernels/gpu/activation_kernel.cu | 14 +++++++- .../phi/kernels/onednn/activation_kernel.cc | 26 ++++++++++++-- paddle/phi/ops/yaml/backward.yaml | 2 +- paddle/phi/ops/yaml/op_version.yaml | 8 +++++ paddle/phi/ops/yaml/ops.yaml | 3 +- python/paddle/tensor/ops.py | 22 +++++++++--- test/legacy_test/test_activation_op.py | 36 +++++++++++++++++-- 10 files changed, 150 insertions(+), 15 deletions(-) diff --git a/paddle/phi/kernels/activation_kernel.h b/paddle/phi/kernels/activation_kernel.h index bf3cb325160d36..4e94260bc6d129 100644 --- a/paddle/phi/kernels/activation_kernel.h +++ b/paddle/phi/kernels/activation_kernel.h @@ -68,7 +68,6 @@ DECLARE_ACTIVATION_KERNEL(Log) DECLARE_ACTIVATION_KERNEL(Log2) DECLARE_ACTIVATION_KERNEL(Log10) DECLARE_ACTIVATION_KERNEL(Log1p) -DECLARE_ACTIVATION_KERNEL(Round) DECLARE_ACTIVATION_KERNEL(Floor) DECLARE_ACTIVATION_KERNEL(Ceil) DECLARE_ACTIVATION_KERNEL(Negative) @@ -98,6 +97,12 @@ void Relu6Kernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out); +template +void RoundKernel(const Context& dev_ctx, + const DenseTensor& x, + const int decimals, + DenseTensor* out); + template void SwishKernel(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/cpu/activation_kernel.cc b/paddle/phi/kernels/cpu/activation_kernel.cc index fda8493c9f4523..22b63e6a0ecdf9 100644 --- a/paddle/phi/kernels/cpu/activation_kernel.cc +++ b/paddle/phi/kernels/cpu/activation_kernel.cc @@ -93,7 +93,6 @@ DEFINE_CPU_ACTIVATION_KERNEL(Rsqrt, RsqrtFunctor) DEFINE_CPU_ACTIVATION_KERNEL(Softsign, SoftsignFunctor) DEFINE_CPU_ACTIVATION_KERNEL(Sigmoid, SigmoidFunctor) DEFINE_CPU_ACTIVATION_KERNEL(LogSigmoid, LogSigmoidFunctor) -DEFINE_CPU_ACTIVATION_KERNEL(Round, RoundFunctor) DEFINE_CPU_ACTIVATION_KERNEL(Floor, FloorFunctor) DEFINE_CPU_ACTIVATION_KERNEL(Ceil, CeilFunctor) DEFINE_CPU_ACTIVATION_KERNEL(Negative, NegativeFunctor) @@ -161,6 +160,19 @@ void Relu6Kernel(const Context& dev_ctx, ActivationImpl>( dev_ctx, x, out, functor); } + +template +void RoundKernel(const Context& dev_ctx, + const DenseTensor& x, + const int decimals, + DenseTensor* out) { + funcs::RoundFunctor functor; + auto attrs = functor.GetAttrs(); + *(attrs[0].second) = decimals; + ActivationImpl>( + dev_ctx, x, out, functor); +} + } // namespace phi PD_REGISTER_KERNEL(relu, CPU, ALL_LAYOUT, phi::ReluKernel, float, double) {} diff --git a/paddle/phi/kernels/funcs/activation_functor.h b/paddle/phi/kernels/funcs/activation_functor.h index 8502c385f7b531..13e6bf0471c2d3 100644 --- a/paddle/phi/kernels/funcs/activation_functor.h +++ b/paddle/phi/kernels/funcs/activation_functor.h @@ -2949,9 +2949,23 @@ struct FloorFunctor : public BaseActivationFunctor { // round(x) = [x] template struct RoundFunctor : public BaseActivationFunctor { + int decimals; + + std::vector> GetAttrs() { + return {{"deciamls", &decimals}}; + } + template void operator()(Device d, X x, Out out) const { - out.device(d) = x.round(); + if (decimals == 0) { + out.device(d) = x.round(); + } else if (decimals > 0) { + auto ten_pow_deciamls = static_cast(std::pow(10, decimals)); + out.device(d) = (x * ten_pow_deciamls).round() / ten_pow_deciamls; + } else { + auto ten_pow_deciamls = static_cast(std::pow(10, -decimals)); + out.device(d) = (x / ten_pow_deciamls).round() * ten_pow_deciamls; + } } }; @@ -5161,11 +5175,26 @@ struct CudaFloorFunctor : public BaseActivationFunctor { template struct CudaRoundFunctor : public BaseActivationFunctor { using MPType = typename phi::dtype::MPTypeTrait::Type; + int decimals; + std::vector> GetAttrs() { + return {{"deciamls", &decimals}}; + } // round(x) = round(x) __device__ __forceinline__ T operator()(const T arg_x) const { MPType x = static_cast(arg_x); - return static_cast(round(x)); + + if (decimals == 0) { + return static_cast(round(x)); + } else if (decimals > 0) { + float ten_pow_deciamls = powf(10., decimals); + return static_cast(round(x * static_cast(ten_pow_deciamls)) / + ten_pow_deciamls); + } else { + float ten_pow_deciamls = powf(10., -decimals); + return static_cast(round(x / static_cast(ten_pow_deciamls)) * + ten_pow_deciamls); + } } }; diff --git a/paddle/phi/kernels/gpu/activation_kernel.cu b/paddle/phi/kernels/gpu/activation_kernel.cu index aa874c5e0dd81c..0ad0cb9f8c8f6c 100644 --- a/paddle/phi/kernels/gpu/activation_kernel.cu +++ b/paddle/phi/kernels/gpu/activation_kernel.cu @@ -110,7 +110,6 @@ DEFINE_GPU_ACTIVATION_KERNEL(Rsqrt, CudaRsqrtFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Softsign, CudaSoftsignFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Sigmoid, CudaSigmoidFunctor) DEFINE_GPU_ACTIVATION_KERNEL(LogSigmoid, CudaLogSigmoidFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Round, CudaRoundFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Floor, CudaFloorFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Ceil, CudaCeilFunctor) @@ -187,6 +186,19 @@ void Relu6Kernel(const Context& dev_ctx, ActivationGPUImpl>( dev_ctx, x, out, functor); } + +template +void RoundKernel(const Context& dev_ctx, + const DenseTensor& x, + const int decimals, + DenseTensor* out) { + funcs::CudaRoundFunctor functor; + auto attrs = functor.GetAttrs(); + *(attrs[0].second) = decimals; + ActivationGPUImpl>( + dev_ctx, x, out, functor); +} + } // namespace phi #ifdef PADDLE_WITH_HIP diff --git a/paddle/phi/kernels/onednn/activation_kernel.cc b/paddle/phi/kernels/onednn/activation_kernel.cc index a4757eab71c41e..247f2df5140d1b 100644 --- a/paddle/phi/kernels/onednn/activation_kernel.cc +++ b/paddle/phi/kernels/onednn/activation_kernel.cc @@ -21,7 +21,6 @@ #include "paddle/phi/common/place.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/activation_functor.h" - namespace phi { #define DEFINE_ONEDNN_ACTIVATION_KERNEL(name, functor_class) \ @@ -149,7 +148,30 @@ DEFINE_ONEDNN_ACTIVATION_KERNEL(Sqrt, SqrtOneDNNFunctor) DEFINE_ONEDNN_ACTIVATION_KERNEL(Tanh, TanhOneDNNFunctor) // round eltwise primitive doesn't support BF16, nor does it support grad -DEFINE_ONEDNN_ACTIVATION_KERNEL(Round, RoundOneDNNFunctor) +template +void RoundKernel(const Context& dev_ctx, + const DenseTensor& x, + const int decimals, + DenseTensor* out) { + float ten_pow_deciamls = std::pow(10, decimals); + + DenseTensor out1; + DenseTensorMeta meta_out(x.dtype(), x.dims()); + out1.set_meta(meta_out); + out1.set_lod(x.lod()); + out1.set_mem_desc(x.mem_desc()); + dev_ctx.template Alloc(&out1); + + for (int i = 0; i < x.numel(); i++) { + out1.data()[i] = x.data()[i] * ten_pow_deciamls; + } + RoundOneDNNFunctor functor; + functor(dev_ctx, out1, 0, 0, out); + + for (int i = 0; i < x.numel(); i++) { + out->data()[i] = out->data()[i] * (1 / ten_pow_deciamls); + } +} DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(Elu, EluOneDNNFunctor, alpha) DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, ReluOneDNNFunctor, alpha) diff --git a/paddle/phi/ops/yaml/backward.yaml b/paddle/phi/ops/yaml/backward.yaml index ac445e5e486d39..51175fbe6422e9 100644 --- a/paddle/phi/ops/yaml/backward.yaml +++ b/paddle/phi/ops/yaml/backward.yaml @@ -2609,7 +2609,7 @@ no_need_buffer : x - backward_op : round_grad - forward : round(Tensor x) -> Tensor(out) + forward : round(Tensor x, int decimals = 0 ) -> Tensor(out) args : (Tensor out_grad) output : Tensor(x_grad) infer_meta : diff --git a/paddle/phi/ops/yaml/op_version.yaml b/paddle/phi/ops/yaml/op_version.yaml index b6081079c4a328..3f4c8c20b414d8 100644 --- a/paddle/phi/ops/yaml/op_version.yaml +++ b/paddle/phi/ops/yaml/op_version.yaml @@ -494,6 +494,14 @@ - add_input : ShiftsTensor comment : The number of places by which the elements of the tensor are shifted. +- op : round + version : + - checkpoint : Add a new attribute [deciamls] to round + action : + - add_attr : decimals + comment : The number of decimal places rounded + default : 0.0 + - op : softmax_with_cross_entropy version : - checkpoint : Add a new attribute [use_softmax] diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml index 50ec6fb78a97d7..c76cd20a149747 100755 --- a/paddle/phi/ops/yaml/ops.yaml +++ b/paddle/phi/ops/yaml/ops.yaml @@ -3788,10 +3788,11 @@ interfaces : paddle::dialect::InferSymbolicShapeInterface - op : round - args : (Tensor x) + args : (Tensor x, int decimals = 0 ) output : Tensor(out) infer_meta : func : UnchangedInferMeta + param : [x] kernel : func : round inplace : (x -> out) diff --git a/python/paddle/tensor/ops.py b/python/paddle/tensor/ops.py index b28d46fea5aee9..5439b8eb1a4a4c 100644 --- a/python/paddle/tensor/ops.py +++ b/python/paddle/tensor/ops.py @@ -35,7 +35,6 @@ 'rsqrt_', 'ceil_', 'floor_', - 'round_', 'reciprocal_', 'sigmoid_', 'abs_', @@ -687,7 +686,7 @@ def reciprocal(x: Tensor, name: str | None = None) -> Tensor: return out -def round(x: Tensor, name: str | None = None) -> Tensor: +def round(x: Tensor, decimals: int = 0, name: str | None = None) -> Tensor: """ Round the values in the input to the nearest integer value. @@ -704,6 +703,7 @@ def round(x: Tensor, name: str | None = None) -> Tensor: Args: x (Tensor): Input of Round operator, an N-D Tensor, with data type float32, float64 or float16. + decimals(int): Rounded decimal place (default: 0). name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: @@ -721,17 +721,31 @@ def round(x: Tensor, name: str | None = None) -> Tensor: [-1., -0., 1., 2.]) """ if in_dynamic_or_pir_mode(): - return _C_ops.round(x) + return _C_ops.round(x, decimals) else: check_variable_and_dtype( x, 'x', ['float16', 'uint16', 'float32', 'float64'], 'round' ) helper = LayerHelper('round', **locals()) + attrs = { + 'decimals': int(decimals), + } out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op(type='round', inputs={"X": x}, outputs={"Out": out}) + helper.append_op( + type='round', inputs={"X": x}, outputs={"Out": out}, attrs=attrs + ) return out +@inplace_apis_in_dygraph_only +def round_(x, decimals=0, name=None): + r""" + Inplace version of ``round`` API, the output Tensor will be inplaced with input ``x``. + Please refer to :ref:`api_paddle_round`. + """ + return _C_ops.round_(x, decimals) + + def rsqrt(x: Tensor, name: str | None = None) -> Tensor: """ Rsqrt Activation Operator. diff --git a/test/legacy_test/test_activation_op.py b/test/legacy_test/test_activation_op.py index 1760f6d2ff2692..fb679e7091bff9 100644 --- a/test/legacy_test/test_activation_op.py +++ b/test/legacy_test/test_activation_op.py @@ -2593,18 +2593,23 @@ def setUp(self): self.python_api = paddle.round self.init_dtype() self.init_shape() + self.init_decimals() np.random.seed(1024) - x = np.random.uniform(-1, 1, self.shape).astype(self.dtype) - out = np.round(x) + x = np.random.uniform(-1, 1, self.shape).astype(self.dtype) * 100 + out = np.round(x, decimals=self.decimals) self.inputs = {'X': OpTest.np_dtype_to_base_dtype(x)} self.outputs = {'Out': out} + self.attrs = {'decimals': self.decimals} self.convert_input_output() def init_shape(self): self.shape = [10, 12] + def init_decimals(self): + self.decimals = 0 + def test_check_output(self): self.check_output( check_pir=True, check_pir_onednn=self.check_pir_onednn @@ -2619,6 +2624,33 @@ def init_shape(self): self.shape = [] +class TestRound_decimals1(TestRound): + def init_decimals(self): + self.decimals = 2 + + def test_round_api(self): + with dynamic_guard(): + for device in devices: + if device == 'cpu' or ( + device == 'gpu' and paddle.is_compiled_with_cuda() + ): + x_np = ( + np.random.uniform(-1, 1, self.shape).astype(self.dtype) + * 100 + ) + out_expect = np.round(x_np, decimals=self.decimals) + x_paddle = paddle.to_tensor( + x_np, dtype=self.dtype, place=device + ) + y = paddle.round(x_paddle, decimals=self.decimals) + np.testing.assert_allclose(y.numpy(), out_expect, rtol=1e-3) + + +class TestRound_decimals2(TestRound_decimals1): + def init_decimals(self): + self.decimals = -1 + + class TestRelu(TestActivation): def setUp(self): self.op_type = "relu" From 90a67e8473ec6514790466e6830c554b5e074e16 Mon Sep 17 00:00:00 2001 From: ming1753 <61511741+ming1753@users.noreply.github.com> Date: Thu, 4 Jul 2024 15:08:26 +0800 Subject: [PATCH 09/16] [Inference] Refine global search optimization for cuBLASLt and apply it in INT8 GEMM. (#65597) * [Inference] Refine global search optimization for cuBLASLt and apply it in INT8 GEMM --- paddle/common/flags.cc | 12 + .../kernels/funcs/blas/blaslt_gemm_search.h | 701 +++++++++++++++++ .../phi/kernels/funcs/blas/blaslt_impl.cu.h | 153 +++- .../fp8_gemm_with_cublasLt/cublaslt_gemm.h | 710 +----------------- 4 files changed, 871 insertions(+), 705 deletions(-) create mode 100644 paddle/phi/kernels/funcs/blas/blaslt_gemm_search.h diff --git a/paddle/common/flags.cc b/paddle/common/flags.cc index bf119fb1fff119..253c1a266e2ddb 100644 --- a/paddle/common/flags.cc +++ b/paddle/common/flags.cc @@ -1758,6 +1758,18 @@ PHI_DEFINE_EXPORTED_string( "If default, " "dlopen will search mkl from LD_LIBRARY_PATH"); +/** + * Apply global search in blaslt FLAG + * Name: enable_blaslt_global_search + * Since Version: 3.0.0 + * Value Range: bool, default=false + * Example: + * Note: If True, will apply global search in blaslt. + */ +PHI_DEFINE_EXPORTED_bool(enable_blaslt_global_search, + false, + "Whether to use global search in blaslt."); + PHI_DEFINE_EXPORTED_string(op_dir, // NOLINT "", "Specify path for loading user-defined op library."); diff --git a/paddle/phi/kernels/funcs/blas/blaslt_gemm_search.h b/paddle/phi/kernels/funcs/blas/blaslt_gemm_search.h new file mode 100644 index 00000000000000..92166603f6940b --- /dev/null +++ b/paddle/phi/kernels/funcs/blas/blaslt_gemm_search.h @@ -0,0 +1,701 @@ +/* Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "paddle/phi/backends/dynload/cublasLt.h" +#include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/common/memory_utils.h" +#include "paddle/phi/core/dense_tensor.h" + +#include "paddle/phi/api/include/context_pool.h" +#include "paddle/phi/common/data_type.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/allocator.h" + +namespace phi { +namespace funcs { +namespace cublaslt_internal { + +const std::array split_k_candidates = {2, 3, 4, 5, 6, 8, 12, 16, 32}; + +struct CublasLtAlgoSelectorParam { + cublasLtMatmulAlgo_t algo; + int m; + int n; + int k; + int algo_id; + int swizzle; + int custom_option; + int tile; + int split_k_val; + int reduction_scheme; + int stages; + void* workspace; + size_t workspace_size; + float time; +}; + +inline bool compare_algo_time(const CublasLtAlgoSelectorParam& param_a, + const CublasLtAlgoSelectorParam& param_b) { + return (param_a.time < param_b.time); +} + +class CublasLtAlgoCache { + public: + static CublasLtAlgoCache& Instance() { + static CublasLtAlgoCache instance(100); + return instance; + } + + template + void RunAndMeasureAlgo(cublasLtHandle_t handle, + cublasLtMatmulDesc_t matmul_desc, + cublasLtMatrixLayout_t a_desc, + cublasLtMatrixLayout_t b_desc, + cublasLtMatrixLayout_t bias_desc, + cublasLtMatrixLayout_t c_desc, + void* alpha, + void* beta, + const InT* a, + const InT* b, + const OutT* bias, + OutT* c, + CublasLtAlgoSelectorParam& param, // NOLINT + cudaEvent_t& start_event, // NOLINT + cudaEvent_t& stop_event, // NOLINT + cudaStream_t stream) { + cublasStatus_t status; + cublasLtMatmulHeuristicResult_t heuristic_result; + status = dynload::cublasLtMatmulAlgoCheck(handle, + matmul_desc, + a_desc, + b_desc, + bias_desc, + c_desc, + ¶m.algo, + &heuristic_result); + PADDLE_ENFORCE_GPU_SUCCESS(status); + if (status != CUBLAS_STATUS_SUCCESS) { + param.time = std::numeric_limits::max(); + return; + } + + PADDLE_ENFORCE_GPU_SUCCESS(cudaEventRecord(start_event, stream)); + int repeats = search_times_; + + for (int loop = 0; loop < repeats; loop++) { + status = dynload::cublasLtMatmul(handle, + matmul_desc, + alpha, + a, + a_desc, + b, + b_desc, + beta, + bias, + bias_desc, + c, + c_desc, + ¶m.algo, + param.workspace, + param.workspace_size, + stream); + if (status != CUBLAS_STATUS_SUCCESS) { + param.time = std::numeric_limits::max(); + return; + } + } + + PADDLE_ENFORCE_GPU_SUCCESS(cudaEventRecord(stop_event, stream)); + PADDLE_ENFORCE_GPU_SUCCESS(cudaStreamSynchronize(stream)); + + float time; + PADDLE_ENFORCE_GPU_SUCCESS( + cudaEventElapsedTime(&time, start_event, stop_event)); + + param.time = time / repeats; + } + + template + cublasLtMatmulAlgo_t* CublasLtAlgoSelect(cublasLtHandle_t handle, + int m, + int n, + int k, + int batch_count, + const InT* a, + const InT* b, + const OutT* bias, + OutT* c, + void* alpha, + void* beta, + cublasLtMatmulDesc_t matmul_desc, + cublasLtMatrixLayout_t a_desc, + cublasLtMatrixLayout_t b_desc, + cublasLtMatrixLayout_t bias_desc, + cublasLtMatrixLayout_t c_desc, + cublasComputeType_t compute_type, + cudaDataType_t scale_type, + cudaDataType_t a_type, + cudaDataType_t b_type, + cudaDataType_t bias_type, + cudaDataType_t c_type, + cudaStream_t stream) { + // If we don't have config file and we donot search, here return nullptr + if (!has_config_file_ && search_times_ <= 0) { + return nullptr; + } + + // VLOG(0) << "m n k: " << m << " " << n << " " << k; + + int64_t seed = 0; + std::hash hash_fn; + + HashMatmulDesc(matmul_desc, &seed, hash_fn); + HashMatrixLayoutDesc(a_desc, &seed, hash_fn); + HashMatrixLayoutDesc(b_desc, &seed, hash_fn); + HashMatrixLayoutDesc(bias_desc, &seed, hash_fn); + HashMatrixLayoutDesc(c_desc, &seed, hash_fn); + + cublasLtMatmulAlgo_t ret; + { + std::lock_guard lock(cache_mutex_); + auto it = map_.find(seed); + if (it != map_.end()) { + VLOG(3) << "CublasLtAlgoSelect Found in cache"; + return &(it->second); + } else { + // if we have cache but not found algo, and we don't want to search, + // here return nullptr + if (search_times_ <= 0) { + return nullptr; + } + } + } + VLOG(3) << "CublasLtAlgoSelect Not Found in cache"; + + // Get Ids + // https://docs.nvidia.com/cuda/cublas/index.html#cublasLtMatmulAlgoGetIds + cublasStatus_t status = CUBLAS_STATUS_SUCCESS; + // std::vector algo_ids(requested_algo_count_); + int algo_ids[requested_algo_count_]; // NOLINT + + int num_algo_ids; + status = dynload::cublasLtMatmulAlgoGetIds(handle, + compute_type, + scale_type, + a_type, + b_type, + bias_type, + c_type, + requested_algo_count_, + algo_ids, + &num_algo_ids); + PADDLE_ENFORCE_GPU_SUCCESS(status); + + // Traverse all posssible algo combinations + int step = 0; + int limit = 20000; + std::vector params; + + for (int idx = 0; idx < num_algo_ids; idx++) { + cublasLtMatmulAlgo_t algo; + + /* Initialize algo structure with given Algp ID */ + // https://docs.nvidia.com/cuda/cublas/index.html#cublasLtMatmulAlgoInit + PADDLE_ENFORCE_GPU_SUCCESS(dynload::cublasLtMatmulAlgoInit(handle, + compute_type, + scale_type, + a_type, + b_type, + bias_type, + c_type, + algo_ids[idx], + &algo)); + + // Query the tiles enums supported by that algo which is used to alloc + // enough space to store it + // https://docs.nvidia.com/cuda/cublas/index.html#cublasLtMatmulAlgoCapGetAttribute + size_t attr_size = 0; + + int batch_support; + PADDLE_ENFORCE_GPU_SUCCESS(dynload::cublasLtMatmulAlgoCapGetAttribute( + &algo, + CUBLASLT_ALGO_CAP_STRIDED_BATCH_SUPPORT, + &batch_support, + sizeof(batch_support), + &attr_size)); + if (batch_count > 1 && batch_support == 0) { + continue; + } + + PADDLE_ENFORCE_GPU_SUCCESS(dynload::cublasLtMatmulAlgoCapGetAttribute( + &algo, CUBLASLT_ALGO_CAP_TILE_IDS, nullptr, 0, &attr_size)); + + int num_tiles = static_cast(attr_size / sizeof(int)); + std::vector tiles(num_tiles == 0 ? 1 : num_tiles); + if (num_tiles == 0) { + tiles[0] = CUBLASLT_MATMUL_TILE_UNDEFINED; + num_tiles = 1; + } else { + PADDLE_ENFORCE_GPU_SUCCESS(dynload::cublasLtMatmulAlgoCapGetAttribute( + &algo, + CUBLASLT_ALGO_CAP_TILE_IDS, + tiles.data(), + sizeof(int) * num_tiles, + &attr_size)); + } + + // Query the stages enums supported by that algo (cuda must >= 11.0) + PADDLE_ENFORCE_GPU_SUCCESS(dynload::cublasLtMatmulAlgoCapGetAttribute( + &algo, CUBLASLT_ALGO_CAP_STAGES_IDS, nullptr, 0, &attr_size)); + int num_stages = static_cast(attr_size / sizeof(int)); + std::vector stages(num_stages == 0 ? 1 : num_stages); + if (num_stages == 0) { + stages[0] = CUBLASLT_MATMUL_STAGES_UNDEFINED; + num_stages = 1; + } else { + PADDLE_ENFORCE_GPU_SUCCESS(dynload::cublasLtMatmulAlgoCapGetAttribute( + &algo, + CUBLASLT_ALGO_CAP_STAGES_IDS, + stages.data(), + sizeof(int) * num_stages, + &attr_size)); + } + + // Retrieve Other Algo Capabilities attributes + int splitk_support, red_mask, swizzling_max, custom_option_max; + PADDLE_ENFORCE_GPU_SUCCESS(dynload::cublasLtMatmulAlgoCapGetAttribute( + &algo, + CUBLASLT_ALGO_CAP_SPLITK_SUPPORT, + &splitk_support, + sizeof(splitk_support), + &attr_size)); + PADDLE_ENFORCE_GPU_SUCCESS(dynload::cublasLtMatmulAlgoCapGetAttribute( + &algo, + CUBLASLT_ALGO_CAP_REDUCTION_SCHEME_MASK, + &red_mask, + sizeof(red_mask), + &attr_size)); + PADDLE_ENFORCE_GPU_SUCCESS(dynload::cublasLtMatmulAlgoCapGetAttribute( + &algo, + CUBLASLT_ALGO_CAP_CTA_SWIZZLING_SUPPORT, + &swizzling_max, + sizeof(swizzling_max), + &attr_size)); + PADDLE_ENFORCE_GPU_SUCCESS(dynload::cublasLtMatmulAlgoCapGetAttribute( + &algo, + CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX, + &custom_option_max, + sizeof(custom_option_max), + &attr_size)); + + /* Loop over the different tiles */ + for (int tile_id = 0; tile_id < num_tiles && step < limit; tile_id++) { + /* Loop over different stages count */ + for (int stage_id = 0; stage_id < num_stages && step < limit; + stage_id++) { + /* Loop over the different custom option if any */ + for (int custom_option = 0; + custom_option <= custom_option_max && step < limit; + custom_option++) { + /* Loop over the CTAs swizzling support */ + for (int k = 0; k <= swizzling_max && step < limit; k++) { + int splir_k_trial = 0; + if (splitk_support) { + splir_k_trial += + sizeof(split_k_candidates) / sizeof(split_k_candidates[0]); + } + + for (int l = 0; (l < (1 + splir_k_trial)) && (step < limit); + l++) { + PADDLE_ENFORCE_GPU_SUCCESS( + dynload::cublasLtMatmulAlgoConfigSetAttribute( + &algo, + CUBLASLT_ALGO_CONFIG_TILE_ID, + &tiles[tile_id], + sizeof(tiles[tile_id]))); + PADDLE_ENFORCE_GPU_SUCCESS( + dynload::cublasLtMatmulAlgoConfigSetAttribute( + &algo, + CUBLASLT_ALGO_CONFIG_STAGES_ID, + &stages[stage_id], + sizeof(stages[stage_id]))); + PADDLE_ENFORCE_GPU_SUCCESS( + dynload::cublasLtMatmulAlgoConfigSetAttribute( + &algo, + CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, + &custom_option, + sizeof(custom_option))); + PADDLE_ENFORCE_GPU_SUCCESS( + dynload::cublasLtMatmulAlgoConfigSetAttribute( + &algo, + CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, + &k, + sizeof(k))); + int split_k_val = 1; + int reduction_scheme = CUBLASLT_REDUCTION_SCHEME_NONE; + PADDLE_ENFORCE_GPU_SUCCESS( + dynload::cublasLtMatmulAlgoConfigSetAttribute( + &algo, + CUBLASLT_ALGO_CONFIG_SPLITK_NUM, + &split_k_val, + sizeof(split_k_val))); + PADDLE_ENFORCE_GPU_SUCCESS( + dynload::cublasLtMatmulAlgoConfigSetAttribute( + &algo, + CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, + &reduction_scheme, + sizeof(int))); + if (l > 0) { // Split-K case + split_k_val = split_k_candidates[l - 1]; + PADDLE_ENFORCE_GPU_SUCCESS( + dynload::cublasLtMatmulAlgoConfigSetAttribute( + &algo, + CUBLASLT_ALGO_CONFIG_SPLITK_NUM, + &split_k_candidates[l - 1], + sizeof(split_k_candidates[l - 1]))); + for (reduction_scheme = 1; + reduction_scheme < + static_cast(CUBLASLT_REDUCTION_SCHEME_MASK) && + (step < limit); + reduction_scheme = reduction_scheme << 1) { + if (reduction_scheme & red_mask) { + PADDLE_ENFORCE_GPU_SUCCESS( + dynload::cublasLtMatmulAlgoConfigSetAttribute( + &algo, + CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, + &reduction_scheme, + sizeof(reduction_scheme))); + + cublasLtMatmulHeuristicResult_t heurResult; + status = dynload::cublasLtMatmulAlgoCheck(handle, + matmul_desc, + a_desc, + b_desc, + bias_desc, + c_desc, + &algo, + &heurResult); + if (status == CUBLAS_STATUS_SUCCESS) { + size_t temp_storage_bytes = heurResult.workspaceSize; + auto d_temp_storage = phi::memory_utils::Alloc( + phi::GPUPlace( + phi::backends::gpu::GetCurrentDeviceId()), + temp_storage_bytes); + + CublasLtAlgoSelectorParam algo_select_params; + algo_select_params.algo = algo; + algo_select_params.m = m; + algo_select_params.n = n; + algo_select_params.k = k; + algo_select_params.algo_id = algo_ids[idx]; + algo_select_params.tile = tiles[tile_id]; + algo_select_params.swizzle = k; + algo_select_params.custom_option = custom_option; + algo_select_params.split_k_val = split_k_val; + algo_select_params.reduction_scheme = reduction_scheme; + algo_select_params.stages = stages[stage_id]; + algo_select_params.workspace_size = temp_storage_bytes; + algo_select_params.workspace = d_temp_storage->ptr(); + params.emplace_back(algo_select_params); + step++; + } + } // end if + } + } else { + // Prepare algos + cublasLtMatmulHeuristicResult_t heurResult; + // https://docs.nvidia.com/cuda/cublas/index.html#cublasLtMatmulAlgoCheck + status = dynload::cublasLtMatmulAlgoCheck(handle, + matmul_desc, + a_desc, + b_desc, + bias_desc, + c_desc, + &algo, + &heurResult); + if (status == CUBLAS_STATUS_SUCCESS) { + size_t temp_storage_bytes = heurResult.workspaceSize; + auto d_temp_storage = phi::memory_utils::Alloc( + phi::GPUPlace(backends::gpu::GetCurrentDeviceId()), + temp_storage_bytes); + CublasLtAlgoSelectorParam algo_select_params; + algo_select_params.algo = algo; + algo_select_params.m = m; + algo_select_params.n = n; + algo_select_params.k = k; + algo_select_params.algo_id = algo_ids[idx]; + algo_select_params.tile = tiles[tile_id]; + algo_select_params.swizzle = k; + algo_select_params.custom_option = custom_option; + algo_select_params.split_k_val = split_k_val; + algo_select_params.reduction_scheme = reduction_scheme; + algo_select_params.stages = stages[stage_id]; + algo_select_params.workspace_size = temp_storage_bytes; + algo_select_params.workspace = d_temp_storage->ptr(); + params.emplace_back(algo_select_params); + step++; + } + } + } + } + } + } + } + } + cudaEvent_t start_event; + cudaEvent_t stop_event; + + PADDLE_ENFORCE_GPU_SUCCESS(cudaEventCreate(&start_event)); + PADDLE_ENFORCE_GPU_SUCCESS(cudaEventCreate(&stop_event)); + + if (step == 0) { + VLOG(3) << "No algo can be used"; + return nullptr; + } + + VLOG(3) << "CublasLtAlgoSelect Start testRun " << step << " " + << params.size(); + + for (int i = 0; i < step; i++) { + RunAndMeasureAlgo(handle, + matmul_desc, + a_desc, + b_desc, + bias_desc, + c_desc, + alpha, + beta, + a, + b, + bias, + c, + params[i], + start_event, + stop_event, + stream); + } + std::sort(params.begin(), params.end(), compare_algo_time); + + size_t res_id = 0; + while (params[res_id].time == 0) res_id++; + + if (res_id >= params.size()) { + VLOG(3) << "No algo can be used"; + return nullptr; + } + + VLOG(3) << "algo selected"; + + ret = params[res_id].algo; + std::lock_guard lock(cache_mutex_); + auto& algo_in_map = map_[seed]; + algo_in_map = ret; + return &algo_in_map; + } + + // Serialize map_ to cache file + void serialize_algo_cache_file() { + if (search_times_ > 0) { + int dev; + cudaGetDevice(&dev); + if (dev == 0) { + std::ofstream outfile; + outfile.open(config_filename_, std::ios::out | std::ios::trunc); + outfile << dynload::cublasLtGetCudartVersion() << std::endl; + + for (const auto& p : map_) { + outfile << p.first << " "; + for (size_t i : p.second.data) { + outfile << i << " "; + } + outfile << std::endl; + } + outfile.close(); + } + } + } + ~CublasLtAlgoCache() { serialize_algo_cache_file(); } + + private: + explicit CublasLtAlgoCache(int search_times) + : search_times_(search_times), has_config_file_(true) { + // Init map_ from cache file + std::ifstream infile; + infile.open(config_filename_); + if (!infile.is_open()) { + has_config_file_ = false; + VLOG(3) << "No CublasLtAlgoCache file found"; + return; + } + size_t cublaslt_version, real_cublaslt_version; + int64_t seed = 0; + std::array algo_data; + infile >> cublaslt_version; + VLOG(1) << "cublaslt_version " << cublaslt_version; + + if (dynload::cublasLtGetCudartVersion() != cublaslt_version) { + LOG(INFO) << config_filename_ + << " is not compatible with current cublaslt_version " + << real_cublaslt_version; + return; + } + + while (!infile.eof()) { + infile >> seed >> algo_data[0] >> algo_data[1] >> algo_data[2] >> + algo_data[3] >> algo_data[4] >> algo_data[5] >> algo_data[6] >> + algo_data[7]; + + for (int i = 0; i < 8; ++i) { + map_[seed].data[i] = algo_data[i]; + } + } + infile.close(); + } + + std::string config_filename_{"./paddle_cublaslt_cache"}; + std::unordered_map map_; + int search_times_; + static constexpr int requested_algo_count_ = 100; + std::mutex cache_mutex_; + bool has_config_file_; + + inline int64_t RoundToNextHighPowOfTwo(int64_t n, int64_t min_val) { + n--; + n |= (n >> 1); + n |= (n >> 2); + n |= (n >> 4); + n |= (n >> 8); + n |= (n >> 16); + return std::max(min_val, (n + 1)); + } + + void HashMatmulDesc(cublasLtMatmulDesc_t desc, + int64_t* seed, + const std::hash& hash_fn) { + size_t size_to_write; + int trans_a, trans_b; + uint32_t epilogue; + // int8_t fast_accum; + + PADDLE_ENFORCE_GPU_SUCCESS( + dynload::cublasLtMatmulDescGetAttribute(desc, + CUBLASLT_MATMUL_DESC_TRANSA, + &trans_a, + sizeof(trans_a), + &size_to_write)); + HashValue(seed, hash_fn, static_cast(trans_a)); + + PADDLE_ENFORCE_GPU_SUCCESS( + dynload::cublasLtMatmulDescGetAttribute(desc, + CUBLASLT_MATMUL_DESC_TRANSB, + &trans_b, + sizeof(trans_b), + &size_to_write)); + HashValue(seed, hash_fn, static_cast(trans_b)); + + PADDLE_ENFORCE_GPU_SUCCESS( + dynload::cublasLtMatmulDescGetAttribute(desc, + CUBLASLT_MATMUL_DESC_EPILOGUE, + &epilogue, + sizeof(epilogue), + &size_to_write)); + HashValue(seed, hash_fn, static_cast(epilogue)); + + // PADDLE_ENFORCE_GPU_SUCCESS( + // dyl::cublasLtMatmulDescGetAttribute(desc, + // CUBLASLT_MATMUL_DESC_FAST_ACCUM, + // &fast_accum, + // sizeof(fast_accum), + // &size_to_write)); + // HashValue(seed, hash_fn, static_cast(fast_accum)); + } + + void HashMatrixLayoutDesc(cublasLtMatrixLayout_t desc, + int64_t* seed, + const std::hash& hash_fn) { + size_t size_to_write; + uint32_t dtype; + int32_t batch; + uint64_t row, col; + int64_t ld, batch_offset; + + PADDLE_ENFORCE_GPU_SUCCESS( + dynload::cublasLtMatrixLayoutGetAttribute(desc, + CUBLASLT_MATRIX_LAYOUT_TYPE, + &dtype, + sizeof(dtype), + &size_to_write)); + HashValue(seed, hash_fn, static_cast(dtype)); + + PADDLE_ENFORCE_GPU_SUCCESS(dynload::cublasLtMatrixLayoutGetAttribute( + desc, + CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, + &batch, + sizeof(batch), + &size_to_write)); + HashValue(seed, hash_fn, static_cast(batch)); + + PADDLE_ENFORCE_GPU_SUCCESS(dynload::cublasLtMatrixLayoutGetAttribute( + desc, CUBLASLT_MATRIX_LAYOUT_ROWS, &row, sizeof(row), &size_to_write)); + HashValue(seed, hash_fn, RoundToNextHighPowOfTwo(row, 32)); + + PADDLE_ENFORCE_GPU_SUCCESS(dynload::cublasLtMatrixLayoutGetAttribute( + desc, CUBLASLT_MATRIX_LAYOUT_COLS, &col, sizeof(col), &size_to_write)); + HashValue(seed, hash_fn, RoundToNextHighPowOfTwo(col, 32)); + + PADDLE_ENFORCE_GPU_SUCCESS(dynload::cublasLtMatrixLayoutGetAttribute( + desc, CUBLASLT_MATRIX_LAYOUT_LD, &ld, sizeof(ld), &size_to_write)); + HashValue(seed, hash_fn, RoundToNextHighPowOfTwo(ld, 32)); + + // PADDLE_ENFORCE_GPU_SUCCESS(dyl::cublasLtMatrixLayoutGetAttribute( + // desc, CUBLASLT_MATRIX_LAYOUT_ROWS, &row, sizeof(row), + // &size_to_write)); + // HashValue(seed, hash_fn, row); + + // PADDLE_ENFORCE_GPU_SUCCESS(dyl::cublasLtMatrixLayoutGetAttribute( + // desc, CUBLASLT_MATRIX_LAYOUT_COLS, &col, sizeof(col), + // &size_to_write)); + // HashValue(seed, hash_fn, col); + + // PADDLE_ENFORCE_GPU_SUCCESS(dyl::cublasLtMatrixLayoutGetAttribute( + // desc, CUBLASLT_MATRIX_LAYOUT_LD, &ld, sizeof(ld), &size_to_write)); + // HashValue(seed, hash_fn, ld); + + PADDLE_ENFORCE_GPU_SUCCESS(dynload::cublasLtMatrixLayoutGetAttribute( + desc, + CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, + &batch_offset, + sizeof(batch_offset), + &size_to_write)); + HashValue(seed, hash_fn, static_cast(batch_offset)); + } + + void HashValue(int64_t* seed, + const std::hash& hash_fn, + int64_t value) { + *seed ^= hash_fn(value) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2); + } +}; + +} // namespace cublaslt_internal +} // namespace funcs +} // namespace phi diff --git a/paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h b/paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h index 08d05d50b8bc70..5ffc7767f05847 100644 --- a/paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h +++ b/paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h @@ -28,8 +28,10 @@ limitations under the License. */ #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/kernels/autotune/gpu_timer.h" #include "paddle/phi/kernels/autotune/switch_autotune.h" +#include "paddle/phi/kernels/funcs/blas/blaslt_gemm_search.h" COMMON_DECLARE_int64(cublaslt_exhaustive_search_times); +COMMON_DECLARE_bool(enable_blaslt_global_search); #endif namespace phi { @@ -197,6 +199,14 @@ struct MatmulDescriptor { cublasLtMatrixLayout_t out_desc{nullptr}; cublasLtMatmulAlgo_t* algo{nullptr}; bool is_cached{false}; + int64_t M_{-1}; + int64_t N_{-1}; + int64_t K_{-1}; + cublasComputeType_t compute_type_; + cudaDataType_t scale_type_; + cudaDataType_t x_type_; + cudaDataType_t y_type_; + cudaDataType_t out_type_; MatmulDescriptor() {} MatmulDescriptor(const MatmulDescriptor& obj) { @@ -276,6 +286,15 @@ struct MatmulDescriptor { SetBatchAndStride(y_desc, batch_size, stride_y); SetBatchAndStride(out_desc, batch_size, stride_out); } + + M_ = M; + N_ = N; + K_ = K; + compute_type_ = compute_type; + scale_type_ = scale_type; + x_type_ = mat_type; + y_type_ = mat_type; + out_type_ = out_mat_type; } cublasLtMatmulAlgo_t* SetAlgo() { @@ -668,27 +687,48 @@ struct CublasLtBase { cublasLtHandle_t cublaslt_handle = ctx.cublaslt_handle(); size_t workspace_size = static_cast(4) * 1024 * 1024; - phi::Allocator::AllocationPtr workspace = GetWorkspace(ctx, workspace_size); - - if (planner != nullptr) { - if (phi::autotune::AutoTuneStatus::Instance().UseAutoTune() && - (!desc->is_cached)) { - SearchBestAlgo(ctx, - cublaslt_handle, - desc, - static_cast(&alpha), - static_cast(&beta), - y_ptr, - x_ptr, - out_ptr, - workspace->ptr(), - workspace_size); - MatmulDescriptor* best_desc = new MatmulDescriptor(*desc); - VLOG(6) << best_desc->GetDescResultString( - "[Searched CublasltDescriptor] "); - - auto& cache = phi::autotune::AutoTuneCache::Instance().GetMatmul(); - cache.SetSubKey(sub_key, reinterpret_cast(best_desc)); + phi::Allocator::AllocationPtr workspace = nullptr; + + if (FLAGS_enable_blaslt_global_search && planner != nullptr && + !desc->is_cached) { + SearchBestAlgoGlobal(ctx, + cublaslt_handle, + desc, + static_cast(&alpha), + static_cast(&beta), + y_ptr, + x_ptr, + out_ptr, + workspace, + workspace_size); + MatmulDescriptor* best_desc = new MatmulDescriptor(*desc); + VLOG(6) << best_desc->GetDescResultString( + "[Searched CublasltDescriptor] "); + + auto& cache = phi::autotune::AutoTuneCache::Instance().GetMatmul(); + cache.SetSubKey(sub_key, reinterpret_cast(best_desc)); + } else { + workspace = GetWorkspace(ctx, workspace_size); + if (planner != nullptr) { + if (phi::autotune::AutoTuneStatus::Instance().UseAutoTune() && + (!desc->is_cached)) { + SearchBestAlgo(ctx, + cublaslt_handle, + desc, + static_cast(&alpha), + static_cast(&beta), + y_ptr, + x_ptr, + out_ptr, + workspace->ptr(), + workspace_size); + MatmulDescriptor* best_desc = new MatmulDescriptor(*desc); + VLOG(6) << best_desc->GetDescResultString( + "[Searched CublasltDescriptor] "); + + auto& cache = phi::autotune::AutoTuneCache::Instance().GetMatmul(); + cache.SetSubKey(sub_key, reinterpret_cast(best_desc)); + } } } @@ -712,6 +752,77 @@ struct CublasLtBase { ctx.stream())); } + static void SearchBestAlgoGlobal( + const phi::GPUContext& ctx, + const cublasLtHandle_t& lt_handle, + MatmulDescriptor* desc, + const void* alpha, + const void* beta, + const void* y_data, + const void* x_data, + void* out_data, + phi::Allocator::AllocationPtr& workspace, // NOLINT + size_t& workspace_size) { // NOLINT + void* bias_ptr = nullptr; + cublasLtMatmulAlgo_t* algo = + cublaslt_internal::CublasLtAlgoCache::Instance().CublasLtAlgoSelect( + lt_handle, + desc->M_, + desc->N_, + desc->K_, + 1, + y_data, + x_data, + bias_ptr, + out_data, + const_cast(alpha), + const_cast(beta), + desc->op_desc, + desc->y_desc, + desc->x_desc, + desc->out_desc, + desc->out_desc, + desc->compute_type_, + desc->scale_type_, + desc->y_type_, + desc->x_type_, + desc->out_type_, + desc->out_type_, + ctx.stream()); + if (algo == nullptr) { + LOG(WARNING) << "CublasLtAlgoSelect failed, result is empty! We attempt " + "to use Heuristic search."; + workspace_size = static_cast(64) * 1024 * 1024; + workspace = GetWorkspace(ctx, workspace_size); + SearchBestAlgo(ctx, + lt_handle, + desc, + static_cast(&alpha), + static_cast(&beta), + y_data, + x_data, + out_data, + workspace->ptr(), + workspace_size); + } else { + cublasLtMatmulHeuristicResult_t heurResult; + PADDLE_ENFORCE_GPU_SUCCESS( + dynload::cublasLtMatmulAlgoCheck(ctx.cublaslt_handle(), + desc->op_desc, + desc->y_desc, + desc->x_desc, + desc->out_desc, + desc->out_desc, + algo, + &heurResult)); + cublasLtMatmulAlgo_t* best_algo = desc->SetAlgo(); + *best_algo = *algo; + workspace_size = heurResult.workspaceSize; + workspace = phi::memory_utils::Alloc( + phi::GPUPlace(backends::gpu::GetCurrentDeviceId()), workspace_size); + } + } + static void SearchBestAlgo(const phi::GPUContext& ctx, const cublasLtHandle_t& lt_handle, MatmulDescriptor* desc, diff --git a/paddle/phi/kernels/fusion/fp8_gemm/fp8_gemm_with_cublasLt/cublaslt_gemm.h b/paddle/phi/kernels/fusion/fp8_gemm/fp8_gemm_with_cublasLt/cublaslt_gemm.h index e661a6af7d0e75..c679c4d02f57eb 100644 --- a/paddle/phi/kernels/fusion/fp8_gemm/fp8_gemm_with_cublasLt/cublaslt_gemm.h +++ b/paddle/phi/kernels/fusion/fp8_gemm/fp8_gemm_with_cublasLt/cublaslt_gemm.h @@ -31,6 +31,7 @@ limitations under the License. */ #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/allocator.h" +#include "paddle/phi/kernels/funcs/blas/blaslt_gemm_search.h" namespace dyl = phi::dynload; @@ -48,666 +49,6 @@ namespace cutlass_internal { "refer https://docs.nvidia.com/cuda/cublas/index.html to get more " \ "information")) -const int split_k_candidates[] = {2, 3, 4, 5, 6, 8, 12, 16, 32}; - -struct CublasLtAlgoSelectorParam { - cublasLtMatmulAlgo_t algo; - int m; - int n; - int k; - int algo_id; - int swizzle; - int custom_option; - int tile; - int split_k_val; - int reduction_scheme; - int stages; - void* workspace; - size_t workspace_size; - float time; -}; - -inline bool compare_algo_time(const CublasLtAlgoSelectorParam& param_a, - const CublasLtAlgoSelectorParam& param_b) { - return (param_a.time < param_b.time); -} - -class CublasLtAlgoCache { - public: - static CublasLtAlgoCache& Instance() { - static CublasLtAlgoCache instance(100); - return instance; - } - - template - void TestMatmulRun(cublasLtHandle_t handle, - cublasLtMatmulDesc_t matmul_desc, - cublasLtMatrixLayout_t a_desc, - cublasLtMatrixLayout_t b_desc, - cublasLtMatrixLayout_t bias_desc, - cublasLtMatrixLayout_t c_desc, - void* alpha, - void* beta, - const InT* a, - const InT* b, - const OutT* bias, - OutT* c, - CublasLtAlgoSelectorParam& param, // NOLINT - cudaEvent_t& start_event, // NOLINT - cudaEvent_t& stop_event, // NOLINT - cudaStream_t stream) { - cublasStatus_t status; - cublasLtMatmulHeuristicResult_t heuristic_result; - status = dyl::cublasLtMatmulAlgoCheck(handle, - matmul_desc, - a_desc, - b_desc, - bias_desc, - c_desc, - ¶m.algo, - &heuristic_result); - PADDLE_CUBLASLT_STATUS_CHECK(cublasLtMatmulAlgoCheck); - if (status != CUBLAS_STATUS_SUCCESS) { - param.time = std::numeric_limits::max(); - return; - } - - PADDLE_ENFORCE_GPU_SUCCESS(cudaEventRecord(start_event, stream)); - int repeats = search_times_; - - for (int loop = 0; loop < repeats; loop++) { - status = dyl::cublasLtMatmul(handle, - matmul_desc, - alpha, - a, - a_desc, - b, - b_desc, - beta, - bias, - bias_desc, - c, - c_desc, - ¶m.algo, - param.workspace, - param.workspace_size, - stream); - if (status != CUBLAS_STATUS_SUCCESS) { - param.time = std::numeric_limits::max(); - return; - } - } - - PADDLE_ENFORCE_GPU_SUCCESS(cudaEventRecord(stop_event, stream)); - PADDLE_ENFORCE_GPU_SUCCESS(cudaStreamSynchronize(stream)); - - float time; - PADDLE_ENFORCE_GPU_SUCCESS( - cudaEventElapsedTime(&time, start_event, stop_event)); - - param.time = time / repeats; - } - - template - cublasLtMatmulAlgo_t* CublasLtAlgoSelect(cublasLtHandle_t handle, - int m, - int n, - int k, - int batch_count, - const InT* a, - const InT* b, - const OutT* bias, - OutT* c, - void* alpha, - void* beta, - cublasLtMatmulDesc_t matmul_desc, - cublasLtMatrixLayout_t a_desc, - cublasLtMatrixLayout_t b_desc, - cublasLtMatrixLayout_t bias_desc, - cublasLtMatrixLayout_t c_desc, - cublasComputeType_t compute_type, - cudaDataType_t scale_type, - cudaDataType_t a_type, - cudaDataType_t b_type, - cudaDataType_t bias_type, - cudaDataType_t c_type, - cudaStream_t stream) { - // If we don't have config file and we donot search, here return nullptr - if (!has_config_file_ && search_times_ <= 0) { - return nullptr; - } - - // VLOG(0) << "m n k" << m << " " << n << " " << k; - - int64_t seed = 0; - std::hash hash_fn; - - HashMatmulDesc(matmul_desc, &seed, hash_fn); - HashMatrixLayoutDesc(a_desc, &seed, hash_fn); - HashMatrixLayoutDesc(b_desc, &seed, hash_fn); - HashMatrixLayoutDesc(bias_desc, &seed, hash_fn); - HashMatrixLayoutDesc(c_desc, &seed, hash_fn); - - cublasLtMatmulAlgo_t ret; - { - std::lock_guard lock(cache_mutex_); - auto it = map_.find(seed); - if (it != map_.end()) { - VLOG(3) << "CublasLtAlgoSelect Found in cache"; - return &(it->second); - } else { - // if we have cache but not found algo, and we don't want to search, - // here return nullptr - if (search_times_ <= 0) { - return nullptr; - } - } - } - VLOG(3) << "CublasLtAlgoSelect Not Found in cache"; - - // Get Ids - // https://docs.nvidia.com/cuda/cublas/index.html#cublasLtMatmulAlgoGetIds - cublasStatus_t status = CUBLAS_STATUS_SUCCESS; - // std::vector algo_ids(requested_algo_count_); - int algo_ids[requested_algo_count_]; // NOLINT - - int num_algo_ids; - status = dyl::cublasLtMatmulAlgoGetIds(handle, - compute_type, - scale_type, - a_type, - b_type, - bias_type, - c_type, - requested_algo_count_, - algo_ids, - &num_algo_ids); - PADDLE_CUBLASLT_STATUS_CHECK(cublasLtMatmulAlgoGetIds); - - // Traverse all posssible algo combinations - int step = 0; - int limit = 20000; - std::vector params; - - for (int idx = 0; idx < num_algo_ids; idx++) { - cublasLtMatmulAlgo_t algo; - - /* Initialize algo structure with given Algp ID */ - // https://docs.nvidia.com/cuda/cublas/index.html#cublasLtMatmulAlgoInit - status = dyl::cublasLtMatmulAlgoInit(handle, - compute_type, - scale_type, - a_type, - b_type, - bias_type, - c_type, - algo_ids[idx], - &algo); - PADDLE_CUBLASLT_STATUS_CHECK(cublasLtMatmulAlgoInit); - - // Query the tiles enums supported by that algo which is used to alloc - // enough space to store it - // https://docs.nvidia.com/cuda/cublas/index.html#cublasLtMatmulAlgoCapGetAttribute - size_t attr_size = 0; - - int batch_support; - status = dyl::cublasLtMatmulAlgoCapGetAttribute( - &algo, - CUBLASLT_ALGO_CAP_STRIDED_BATCH_SUPPORT, - &batch_support, - sizeof(batch_support), - &attr_size); - PADDLE_CUBLASLT_STATUS_CHECK(cublasLtMatmulAlgoCapGetAttribute); - if (batch_count > 1 && batch_support == 0) { - continue; - } - - status = dyl::cublasLtMatmulAlgoCapGetAttribute( - &algo, CUBLASLT_ALGO_CAP_TILE_IDS, nullptr, 0, &attr_size); - PADDLE_CUBLASLT_STATUS_CHECK(cublasLtMatmulAlgoCapGetAttribute); - - int num_tiles = static_cast(attr_size / sizeof(int)); - std::vector tiles(num_tiles == 0 ? 1 : num_tiles); - if (num_tiles == 0) { - tiles[0] = CUBLASLT_MATMUL_TILE_UNDEFINED; - num_tiles = 1; - } else { - status = - dyl::cublasLtMatmulAlgoCapGetAttribute(&algo, - CUBLASLT_ALGO_CAP_TILE_IDS, - tiles.data(), - sizeof(int) * num_tiles, - &attr_size); - PADDLE_CUBLASLT_STATUS_CHECK(cublasLtMatmulAlgoCapGetAttribute); - } - - // Query the stages enums supported by that algo (cuda must >= 11.0) - status = dyl::cublasLtMatmulAlgoCapGetAttribute( - &algo, CUBLASLT_ALGO_CAP_STAGES_IDS, nullptr, 0, &attr_size); - PADDLE_CUBLASLT_STATUS_CHECK(cublasLtMatmulAlgoCapGetAttribute); - int num_stages = static_cast(attr_size / sizeof(int)); - std::vector stages(num_stages == 0 ? 1 : num_stages); - if (num_stages == 0) { - stages[0] = CUBLASLT_MATMUL_STAGES_UNDEFINED; - num_stages = 1; - } else { - status = - dyl::cublasLtMatmulAlgoCapGetAttribute(&algo, - CUBLASLT_ALGO_CAP_STAGES_IDS, - stages.data(), - sizeof(int) * num_stages, - &attr_size); - PADDLE_CUBLASLT_STATUS_CHECK(cublasLtMatmulAlgoCapGetAttribute); - } - - // Retrieve Other Algo Capabilities attributes - int splitk_support, red_mask, swizzling_max, custom_option_max; - status = dyl::cublasLtMatmulAlgoCapGetAttribute( - &algo, - CUBLASLT_ALGO_CAP_SPLITK_SUPPORT, - &splitk_support, - sizeof(splitk_support), - &attr_size); - status = dyl::cublasLtMatmulAlgoCapGetAttribute( - &algo, - CUBLASLT_ALGO_CAP_REDUCTION_SCHEME_MASK, - &red_mask, - sizeof(red_mask), - &attr_size); - status = dyl::cublasLtMatmulAlgoCapGetAttribute( - &algo, - CUBLASLT_ALGO_CAP_CTA_SWIZZLING_SUPPORT, - &swizzling_max, - sizeof(swizzling_max), - &attr_size); - status = dyl::cublasLtMatmulAlgoCapGetAttribute( - &algo, - CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX, - &custom_option_max, - sizeof(custom_option_max), - &attr_size); - PADDLE_CUBLASLT_STATUS_CHECK(cublasLtMatmulAlgoCapGetAttribute); - - /* Loop over the different tiles */ - for (int tile_id = 0; tile_id < num_tiles && step < limit; tile_id++) { - /* Loop over different stages count */ - for (int stage_id = 0; stage_id < num_stages && step < limit; - stage_id++) { - /* Loop over the different custom option if any */ - for (int custom_option = 0; - custom_option <= custom_option_max && step < limit; - custom_option++) { - /* Loop over the CTAs swizzling support */ - for (int k = 0; k <= swizzling_max && step < limit; k++) { - int splir_k_trial = 0; - if (splitk_support) { - splir_k_trial += - sizeof(split_k_candidates) / sizeof(split_k_candidates[0]); - } - - for (int l = 0; (l < (1 + splir_k_trial)) && (step < limit); - l++) { - status = dyl::cublasLtMatmulAlgoConfigSetAttribute( - &algo, - CUBLASLT_ALGO_CONFIG_TILE_ID, - &tiles[tile_id], - sizeof(tiles[tile_id])); - status = dyl::cublasLtMatmulAlgoConfigSetAttribute( - &algo, - CUBLASLT_ALGO_CONFIG_STAGES_ID, - &stages[stage_id], - sizeof(stages[stage_id])); - status = dyl::cublasLtMatmulAlgoConfigSetAttribute( - &algo, - CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, - &custom_option, - sizeof(custom_option)); - status = dyl::cublasLtMatmulAlgoConfigSetAttribute( - &algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &k, sizeof(k)); - int split_k_val = 1; - int reduction_scheme = CUBLASLT_REDUCTION_SCHEME_NONE; - status = dyl::cublasLtMatmulAlgoConfigSetAttribute( - &algo, - CUBLASLT_ALGO_CONFIG_SPLITK_NUM, - &split_k_val, - sizeof(split_k_val)); - status = dyl::cublasLtMatmulAlgoConfigSetAttribute( - &algo, - CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, - &reduction_scheme, - sizeof(int)); - if (l > 0) { // Split-K case - split_k_val = split_k_candidates[l - 1]; - status = dyl::cublasLtMatmulAlgoConfigSetAttribute( - &algo, - CUBLASLT_ALGO_CONFIG_SPLITK_NUM, - &split_k_candidates[l - 1], - sizeof(split_k_candidates[l - 1])); - for (reduction_scheme = 1; - reduction_scheme < - static_cast(CUBLASLT_REDUCTION_SCHEME_MASK) && - (step < limit); - reduction_scheme = reduction_scheme << 1) { - if (reduction_scheme & red_mask) { - status = dyl::cublasLtMatmulAlgoConfigSetAttribute( - &algo, - CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, - &reduction_scheme, - sizeof(reduction_scheme)); - PADDLE_CUBLASLT_STATUS_CHECK( - cublasLtMatmulAlgoConfigSetAttribute); - - cublasLtMatmulHeuristicResult_t heurResult; - status = dyl::cublasLtMatmulAlgoCheck(handle, - matmul_desc, - a_desc, - b_desc, - bias_desc, - c_desc, - &algo, - &heurResult); - if (status == CUBLAS_STATUS_SUCCESS) { - size_t temp_storage_bytes = heurResult.workspaceSize; - auto d_temp_storage = phi::memory_utils::Alloc( - phi::GPUPlace( - phi::backends::gpu::GetCurrentDeviceId()), - temp_storage_bytes); - - CublasLtAlgoSelectorParam algo_select_params; - algo_select_params.algo = algo; - algo_select_params.m = m; - algo_select_params.n = n; - algo_select_params.k = k; - algo_select_params.algo_id = algo_ids[idx]; - algo_select_params.tile = tiles[tile_id]; - algo_select_params.swizzle = k; - algo_select_params.custom_option = custom_option; - algo_select_params.split_k_val = split_k_val; - algo_select_params.reduction_scheme = reduction_scheme; - algo_select_params.stages = stages[stage_id]; - algo_select_params.workspace_size = temp_storage_bytes; - algo_select_params.workspace = d_temp_storage->ptr(); - params.emplace_back(algo_select_params); - step++; - } - } // end if - } - } else { - // Prepare algos - cublasLtMatmulHeuristicResult_t heurResult; - // https://docs.nvidia.com/cuda/cublas/index.html#cublasLtMatmulAlgoCheck - status = dyl::cublasLtMatmulAlgoCheck(handle, - matmul_desc, - a_desc, - b_desc, - bias_desc, - c_desc, - &algo, - &heurResult); - if (status == CUBLAS_STATUS_SUCCESS) { - size_t temp_storage_bytes = heurResult.workspaceSize; - auto d_temp_storage = phi::memory_utils::Alloc( - phi::GPUPlace(backends::gpu::GetCurrentDeviceId()), - temp_storage_bytes); - CublasLtAlgoSelectorParam algo_select_params; - algo_select_params.algo = algo; - algo_select_params.m = m; - algo_select_params.n = n; - algo_select_params.k = k; - algo_select_params.algo_id = algo_ids[idx]; - algo_select_params.tile = tiles[tile_id]; - algo_select_params.swizzle = k; - algo_select_params.custom_option = custom_option; - algo_select_params.split_k_val = split_k_val; - algo_select_params.reduction_scheme = reduction_scheme; - algo_select_params.stages = stages[stage_id]; - algo_select_params.workspace_size = temp_storage_bytes; - algo_select_params.workspace = d_temp_storage->ptr(); - params.emplace_back(algo_select_params); - step++; - } - } - } - } - } - } - } - } - cudaEvent_t start_event; - cudaEvent_t stop_event; - - PADDLE_ENFORCE_GPU_SUCCESS(cudaEventCreate(&start_event)); - PADDLE_ENFORCE_GPU_SUCCESS(cudaEventCreate(&stop_event)); - - if (step == 0) { - VLOG(3) << "No algo can be used"; - return nullptr; - } - - VLOG(3) << "CublasLtAlgoSelect Start testRun " << step << " " - << params.size(); - - for (int i = 0; i < step; i++) { - TestMatmulRun(handle, - matmul_desc, - a_desc, - b_desc, - bias_desc, - c_desc, - alpha, - beta, - a, - b, - bias, - c, - params[i], - start_event, - stop_event, - stream); - } - std::sort(params.begin(), params.end(), compare_algo_time); - - int res_id = 0; - while (params[res_id].time == 0) res_id++; - - if (res_id >= params.size()) { - VLOG(3) << "No algo can be used"; - return nullptr; - } - - VLOG(3) << "algo selected"; - - ret = params[res_id].algo; - std::lock_guard lock(cache_mutex_); - auto& algo_in_map = map_[seed]; - algo_in_map = ret; - return &algo_in_map; - } - - // Serialize map_ to cache file - void serialize_algo_cache_file() { - if (search_times_ > 0) { - int dev; - cudaGetDevice(&dev); - if (dev == 0) { - std::ofstream outfile; - outfile.open(config_filename_, std::ios::out | std::ios::trunc); - outfile << dyl::cublasLtGetCudartVersion() << std::endl; - - for (const auto& p : map_) { - outfile << p.first << " "; - for (int i = 0; i < 8; ++i) { - outfile << p.second.data[i] << " "; - } - outfile << std::endl; - } - outfile.close(); - } - } - } - ~CublasLtAlgoCache() { serialize_algo_cache_file(); } - - private: - explicit CublasLtAlgoCache(int search_times) - : search_times_(search_times), has_config_file_(true) { - // Init map_ from cache file - std::ifstream infile; - infile.open(config_filename_); - if (!infile.is_open()) { - has_config_file_ = false; - VLOG(3) << "No CublasLtAlgoCache file found"; - return; - } - size_t cublaslt_version, real_cublaslt_version; - int64_t seed = 0; - uint64_t algo_data[8]; - infile >> cublaslt_version; - VLOG(1) << "cublaslt_version " << cublaslt_version; - - if (dyl::cublasLtGetCudartVersion() != cublaslt_version) { - LOG(INFO) << config_filename_ - << " is not compatible with current cublaslt_version " - << real_cublaslt_version; - return; - } - - while (!infile.eof()) { - infile >> seed >> algo_data[0] >> algo_data[1] >> algo_data[2] >> - algo_data[3] >> algo_data[4] >> algo_data[5] >> algo_data[6] >> - algo_data[7]; - - for (int i = 0; i < 8; ++i) { - map_[seed].data[i] = algo_data[i]; - } - } - infile.close(); - } - - std::string config_filename_{"./paddle_cublaslt_cache"}; - std::unordered_map map_; - int search_times_; - const int requested_algo_count_ = 100; - std::mutex cache_mutex_; - bool has_config_file_; - - inline int64_t RoundToNextHighPowOfTwo(int64_t n, int64_t min_val) { - n--; - n |= (n >> 1); - n |= (n >> 2); - n |= (n >> 4); - n |= (n >> 8); - n |= (n >> 16); - return std::max(min_val, (n + 1)); - } - - void HashMatmulDesc(cublasLtMatmulDesc_t desc, - int64_t* seed, - const std::hash& hash_fn) { - size_t size_to_write; - int trans_a, trans_b; - uint32_t epilogue; - // int8_t fast_accum; - - PADDLE_ENFORCE_GPU_SUCCESS( - dyl::cublasLtMatmulDescGetAttribute(desc, - CUBLASLT_MATMUL_DESC_TRANSA, - &trans_a, - sizeof(trans_a), - &size_to_write)); - HashValue(seed, hash_fn, static_cast(trans_a)); - - PADDLE_ENFORCE_GPU_SUCCESS( - dyl::cublasLtMatmulDescGetAttribute(desc, - CUBLASLT_MATMUL_DESC_TRANSB, - &trans_b, - sizeof(trans_b), - &size_to_write)); - HashValue(seed, hash_fn, static_cast(trans_b)); - - PADDLE_ENFORCE_GPU_SUCCESS( - dyl::cublasLtMatmulDescGetAttribute(desc, - CUBLASLT_MATMUL_DESC_EPILOGUE, - &epilogue, - sizeof(epilogue), - &size_to_write)); - HashValue(seed, hash_fn, static_cast(epilogue)); - - // PADDLE_ENFORCE_GPU_SUCCESS( - // dyl::cublasLtMatmulDescGetAttribute(desc, - // CUBLASLT_MATMUL_DESC_FAST_ACCUM, - // &fast_accum, - // sizeof(fast_accum), - // &size_to_write)); - // HashValue(seed, hash_fn, static_cast(fast_accum)); - } - - void HashMatrixLayoutDesc(cublasLtMatrixLayout_t desc, - int64_t* seed, - const std::hash& hash_fn) { - size_t size_to_write; - uint32_t dtype; - int32_t batch; - uint64_t row, col; - int64_t ld, batch_offset; - - PADDLE_ENFORCE_GPU_SUCCESS( - dyl::cublasLtMatrixLayoutGetAttribute(desc, - CUBLASLT_MATRIX_LAYOUT_TYPE, - &dtype, - sizeof(dtype), - &size_to_write)); - HashValue(seed, hash_fn, static_cast(dtype)); - - PADDLE_ENFORCE_GPU_SUCCESS(dyl::cublasLtMatrixLayoutGetAttribute( - desc, - CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, - &batch, - sizeof(batch), - &size_to_write)); - HashValue(seed, hash_fn, static_cast(batch)); - - PADDLE_ENFORCE_GPU_SUCCESS(dyl::cublasLtMatrixLayoutGetAttribute( - desc, CUBLASLT_MATRIX_LAYOUT_ROWS, &row, sizeof(row), &size_to_write)); - HashValue(seed, hash_fn, RoundToNextHighPowOfTwo(row, 32)); - - PADDLE_ENFORCE_GPU_SUCCESS(dyl::cublasLtMatrixLayoutGetAttribute( - desc, CUBLASLT_MATRIX_LAYOUT_COLS, &col, sizeof(col), &size_to_write)); - HashValue(seed, hash_fn, RoundToNextHighPowOfTwo(col, 32)); - - PADDLE_ENFORCE_GPU_SUCCESS(dyl::cublasLtMatrixLayoutGetAttribute( - desc, CUBLASLT_MATRIX_LAYOUT_LD, &ld, sizeof(ld), &size_to_write)); - HashValue(seed, hash_fn, RoundToNextHighPowOfTwo(ld, 32)); - - // PADDLE_ENFORCE_GPU_SUCCESS(dyl::cublasLtMatrixLayoutGetAttribute( - // desc, CUBLASLT_MATRIX_LAYOUT_ROWS, &row, sizeof(row), - // &size_to_write)); - // HashValue(seed, hash_fn, row); - - // PADDLE_ENFORCE_GPU_SUCCESS(dyl::cublasLtMatrixLayoutGetAttribute( - // desc, CUBLASLT_MATRIX_LAYOUT_COLS, &col, sizeof(col), - // &size_to_write)); - // HashValue(seed, hash_fn, col); - - // PADDLE_ENFORCE_GPU_SUCCESS(dyl::cublasLtMatrixLayoutGetAttribute( - // desc, CUBLASLT_MATRIX_LAYOUT_LD, &ld, sizeof(ld), &size_to_write)); - // HashValue(seed, hash_fn, ld); - - PADDLE_ENFORCE_GPU_SUCCESS(dyl::cublasLtMatrixLayoutGetAttribute( - desc, - CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, - &batch_offset, - sizeof(batch_offset), - &size_to_write)); - HashValue(seed, hash_fn, static_cast(batch_offset)); - } - - void HashValue(int64_t* seed, - const std::hash& hash_fn, - int64_t value) { - *seed ^= hash_fn(value) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2); - } -}; - template inline cudaDataType_t GetCublasLtDataType() { return CUDA_R_32F; @@ -857,30 +198,31 @@ void CublasLtMatmulFP8(const phi::GPUContext& dev_ctx, PADDLE_CUBLASLT_STATUS_CHECK(cublasLtMatmulDescSetAttribute); } - cublasLtMatmulAlgo_t* algo = CublasLtAlgoCache::Instance().CublasLtAlgoSelect( - dev_ctx.cublaslt_handle(), - m, - n, - k, - batch_count, - mat_b.data(), - mat_a.data(), - bias_ptr, - out->data(), - &alpha_, - &beta_, - matmul_desc_, - B_desc_, - A_desc_, - Bias_desc_, - C_desc_, - CUBLAS_COMPUTE_32F, - CUDA_R_32F, - B_type, - A_type, - Bias_type, - C_type, - dev_ctx.stream()); + cublasLtMatmulAlgo_t* algo = + funcs::cublaslt_internal::CublasLtAlgoCache::Instance() + .CublasLtAlgoSelect(dev_ctx.cublaslt_handle(), + m, + n, + k, + batch_count, + mat_b.data(), + mat_a.data(), + bias_ptr, + out->data(), + &alpha_, + &beta_, + matmul_desc_, + B_desc_, + A_desc_, + Bias_desc_, + C_desc_, + CUBLAS_COMPUTE_32F, + CUDA_R_32F, + B_type, + A_type, + Bias_type, + C_type, + dev_ctx.stream()); if (algo == nullptr) { int returnedResults = 0; From 239bf7b94f0b3dd84ecb995732636873555c7a2a Mon Sep 17 00:00:00 2001 From: co63oc Date: Thu, 4 Jul 2024 15:20:08 +0800 Subject: [PATCH 10/16] Clean some tests (#65663) * Fix * Fix * Fix * Fix * ci --- test/deprecated/legacy_test/test_crop_op.py | 160 ----- test/legacy_test/CMakeLists.txt | 15 - test/legacy_test/test_bicubic_interp_op.py | 518 --------------- test/legacy_test/test_bilinear_interp_op.py | 520 --------------- .../legacy_test/test_generate_proposals_op.py | 452 ------------- .../test_generate_proposals_v2_op.py | 168 ++++- test/legacy_test/test_linear_interp_op.py | 381 ----------- test/legacy_test/test_lookup_table_op.py | 437 ------------- test/legacy_test/test_matmul_op.py | 249 ------- test/legacy_test/test_nearest_interp_op.py | 471 -------------- test/legacy_test/test_trilinear_interp_op.py | 613 ------------------ test/mkldnn/test_bilinear_interp_mkldnn_op.py | 204 ------ test/mkldnn/test_matmul_mkldnn_op.py | 260 -------- test/mkldnn/test_nearest_interp_mkldnn_op.py | 203 ------ test/xpu/CMakeLists.txt | 1 - test/xpu/test_bilinear_interp_op_xpu.py | 508 --------------- test/xpu/test_matmul_op_xpu.py | 387 ----------- test/xpu/test_nearest_interp_op_xpu.py | 441 ------------- 18 files changed, 167 insertions(+), 5821 deletions(-) delete mode 100644 test/deprecated/legacy_test/test_crop_op.py delete mode 100644 test/legacy_test/test_bicubic_interp_op.py delete mode 100755 test/legacy_test/test_bilinear_interp_op.py delete mode 100644 test/legacy_test/test_generate_proposals_op.py delete mode 100755 test/legacy_test/test_linear_interp_op.py delete mode 100644 test/legacy_test/test_lookup_table_op.py delete mode 100644 test/legacy_test/test_matmul_op.py delete mode 100755 test/legacy_test/test_nearest_interp_op.py delete mode 100755 test/legacy_test/test_trilinear_interp_op.py delete mode 100644 test/mkldnn/test_bilinear_interp_mkldnn_op.py delete mode 100644 test/mkldnn/test_matmul_mkldnn_op.py delete mode 100644 test/mkldnn/test_nearest_interp_mkldnn_op.py delete mode 100755 test/xpu/test_bilinear_interp_op_xpu.py delete mode 100644 test/xpu/test_matmul_op_xpu.py delete mode 100644 test/xpu/test_nearest_interp_op_xpu.py diff --git a/test/deprecated/legacy_test/test_crop_op.py b/test/deprecated/legacy_test/test_crop_op.py deleted file mode 100644 index 858fd89fc7e998..00000000000000 --- a/test/deprecated/legacy_test/test_crop_op.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - -import paddle - - -def crop(data, offsets, crop_shape): - def indexOf(shape, index): - result = [] - for dim in reversed(shape): - result.append(index % dim) - index = index / dim - return result[::-1] - - result = [] - for i, value in enumerate(data.flatten()): - index = indexOf(data.shape, i) - selected = True - if len(index) == len(offsets): - for j, offset in enumerate(offsets): - selected = ( - selected - and index[j] >= offset - and index[j] < crop_shape[j] + offset - ) - if selected: - result.append(value) - return np.array(result).reshape(crop_shape) - - -class TestCropOp(OpTest): - def setUp(self): - self.op_type = "crop" - self.crop_by_input = False - self.offset_by_input = False - self.attrs = {} - self.initTestCase() - if self.crop_by_input: - self.inputs = { - 'X': np.random.random(self.x_shape).astype("float64"), - 'Y': np.random.random(self.crop_shape).astype("float64"), - } - else: - self.attrs['shape'] = self.crop_shape - self.inputs = { - 'X': np.random.random(self.x_shape).astype("float64"), - } - if self.offset_by_input: - self.inputs['Offsets'] = np.array(self.offsets).astype('int32') - else: - self.attrs['offsets'] = self.offsets - if self.offsets is None: - self.offsets = [0] * len(self.crop_shape) - if self.crop_shape is None: - self.crop_shape = self.x_shape - - self.outputs = { - 'Out': crop(self.inputs['X'], self.offsets, self.crop_shape) - } - - def initTestCase(self): - self.x_shape = (10, 10) - self.crop_shape = (2, 2) - self.offsets = [1, 2] - - def test_check_output(self): - self.check_output() - - def test_check_grad_normal(self): - self.check_grad(['X'], 'Out') - - -class TestCase1(TestCropOp): - def initTestCase(self): - self.x_shape = (16, 8, 32) - self.crop_shape = [2, 2, 3] - self.offsets = [1, 5, 3] - - -class TestCase2(TestCropOp): - def initTestCase(self): - self.x_shape = (15, 8) - self.crop_shape = [15, 8] - self.offsets = [0, 0] - - -class TestCase3(TestCropOp): - def initTestCase(self): - self.x_shape = (4, 8, 16) - self.crop_shape = [2, 2, 3] - self.offsets = [1, 5, 3] - self.crop_by_input = True - - -class TestCase4(TestCropOp): - def initTestCase(self): - self.x_shape = (10, 10) - self.crop_shape = [10, 10] - self.offsets = [0, 0] - self.crop_by_input = True - - -class TestCase5(TestCropOp): - def initTestCase(self): - self.x_shape = (3, 4, 10) - self.crop_shape = [2, 2, 3] - self.offsets = [1, 0, 2] - self.offset_by_input = True - - -class TestCase6(TestCropOp): - def initTestCase(self): - self.x_shape = (10, 9, 14) - self.crop_shape = [3, 3, 5] - self.offsets = [3, 5, 4] - self.crop_by_input = True - self.offset_by_input = True - - -class TestCropNoneOffset(unittest.TestCase): - def test_crop_none_offset(self): - x = paddle.static.data(name="input1", shape=[3, 6, 6], dtype="float32") - crop_shape = [2, 2, 2] - crop = paddle.crop(x, crop_shape, None) - self.assertEqual(crop.shape, (2, 2, 2)) - - -class TestCropNoneShape(unittest.TestCase): - def test_crop_none_shape(self): - x = paddle.static.data(name="input1", shape=[3, 6, 6], dtype="float32") - crop = paddle.crop(x) - self.assertEqual(crop.shape, (3, 6, 6)) - - -class TestCropError(unittest.TestCase): - def test_neg_offset_error(self): - with self.assertRaises(ValueError): - x = paddle.static.data(name='input2', shape=[1], dtype="float32") - out = paddle.crop(x, offsets=[-1]) - - -if __name__ == '__main__': - paddle.enable_static() - unittest.main() diff --git a/test/legacy_test/CMakeLists.txt b/test/legacy_test/CMakeLists.txt index e12f367f355218..475099f3b02e75 100644 --- a/test/legacy_test/CMakeLists.txt +++ b/test/legacy_test/CMakeLists.txt @@ -439,8 +439,6 @@ list(REMOVE_ITEM TEST_OPS test_basic_lstm_unit_op) list(REMOVE_ITEM TEST_OPS test_fuse_bn_add_act_pass) list(REMOVE_ITEM TEST_OPS test_conv3d_transpose_op) list(REMOVE_ITEM TEST_OPS test_layers) -list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op) -list(REMOVE_ITEM TEST_OPS test_nearest_interp_op) # disable this unittest temporarily list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_exception) @@ -493,7 +491,6 @@ set(TEST_OPS_WITH_GC test_scatter_op test_concat_op test_elementwise_add_op - test_lookup_table_op test_elementwise_sub_op test_gather_op test_mean_op @@ -579,11 +576,6 @@ if((WITH_GPU) AND (WITH_CUDNN_FRONTEND)) test_fused_dot_product_attention_op_static) endif() -py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op ENVS - ${GC_ENVS}) -py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op ENVS - ${GC_ENVS}) - set_tests_properties(test_conv2d_op PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_norm_nn_grad PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_nn_grad PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") @@ -1043,7 +1035,6 @@ set(STATIC_BUILD_TESTS test_nce test_layer_norm_op test_eigh_op - test_matmul_op test_matmul_v2_op test_paddle_save_load_binary test_assign_pos_op @@ -1153,8 +1144,6 @@ set_tests_properties(test_radam_op PROPERTIES TIMEOUT 100) set_tests_properties(test_nan_inf PROPERTIES TIMEOUT 120) set_tests_properties(test_linalg_cholesky_inverse PROPERTIES TIMEOUT 100) set_tests_properties(test_sparse_mask_as_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_bicubic_interp_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_bilinear_interp_op PROPERTIES TIMEOUT 120) set_tests_properties(test_conv2d_op_depthwise_conv PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_conv2d_op_depthwise_conv PROPERTIES TIMEOUT 120) @@ -1173,9 +1162,7 @@ set_tests_properties(test_imperative_star_gan_with_gradient_penalty set_tests_properties(test_index_add_op PROPERTIES TIMEOUT 120) set_tests_properties(test_index_select_op PROPERTIES TIMEOUT 120) set_tests_properties(test_lstm_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_matmul_op PROPERTIES TIMEOUT 120) set_tests_properties(test_matmul_v2_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_nearest_interp_op PROPERTIES TIMEOUT 120) set_tests_properties(test_nearest_interp_v2_op PROPERTIES TIMEOUT 120) set_tests_properties(test_pad3d_op PROPERTIES TIMEOUT 120) set_tests_properties(test_paddle_save_load_binary PROPERTIES TIMEOUT 120) @@ -1190,9 +1177,7 @@ else() endif() set_tests_properties(test_svd_op PROPERTIES TIMEOUT 80) set_tests_properties(test_static_save_load PROPERTIES TIMEOUT 250) -set_tests_properties(test_trilinear_interp_op PROPERTIES TIMEOUT 120) set_tests_properties(test_trilinear_interp_v2_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_matmul_op_static_build PROPERTIES TIMEOUT 120) set_tests_properties(test_matmul_v2_op_static_build PROPERTIES TIMEOUT 120) set_tests_properties(test_paddle_save_load_binary_static_build PROPERTIES TIMEOUT 120) diff --git a/test/legacy_test/test_bicubic_interp_op.py b/test/legacy_test/test_bicubic_interp_op.py deleted file mode 100644 index d9c68bd4c09bf4..00000000000000 --- a/test/legacy_test/test_bicubic_interp_op.py +++ /dev/null @@ -1,518 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - -import paddle -from paddle import base -from paddle.nn.functional import interpolate -from paddle.pir_utils import test_with_pir_api - - -def cubic_1(x, a): - return ((a + 2) * x - (a + 3)) * x * x + 1 - - -def cubic_2(x, a): - return ((a * x - 5 * a) * x + 8 * a) * x - 4 * a - - -def cubic_interp1d(x0, x1, x2, x3, t): - param = [0, 0, 0, 0] - a = -0.75 - x_1 = t - x_2 = 1.0 - t - param[0] = cubic_2(x_1 + 1.0, a) - param[1] = cubic_1(x_1, a) - param[2] = cubic_1(x_2, a) - param[3] = cubic_2(x_2 + 1.0, a) - return x0 * param[0] + x1 * param[1] + x2 * param[2] + x3 * param[3] - - -def value_bound(input, w, h, x, y): - access_x = int(max(min(x, w - 1), 0)) - access_y = int(max(min(y, h - 1), 0)) - return input[:, :, access_y, access_x] - - -def bicubic_interp_np( - input, - out_h, - out_w, - out_size=None, - actual_shape=None, - align_corners=True, - data_layout='kNCHW', -): - """trilinear interpolation implement in shape [N, C, H, W]""" - if data_layout == "NHWC": - input = np.transpose(input, (0, 3, 1, 2)) # NHWC => NCHW - if out_size is not None: - out_h = out_size[0] - out_w = out_size[1] - if actual_shape is not None: - out_h = actual_shape[0] - out_w = actual_shape[1] - batch_size, channel, in_h, in_w = input.shape - - ratio_h = ratio_w = 0.0 - if out_h > 1: - if align_corners: - ratio_h = (in_h - 1.0) / (out_h - 1.0) - else: - ratio_h = 1.0 * in_h / out_h - - if out_w > 1: - if align_corners: - ratio_w = (in_w - 1.0) / (out_w - 1.0) - else: - ratio_w = 1.0 * in_w / out_w - - out = np.zeros((batch_size, channel, out_h, out_w)) - - for k in range(out_h): - if align_corners: - h = ratio_h * k - else: - h = ratio_h * (k + 0.5) - 0.5 - input_y = np.floor(h) - y_t = h - input_y - for l in range(out_w): - if align_corners: - w = ratio_w * l - else: - w = ratio_w * (l + 0.5) - 0.5 - input_x = np.floor(w) - x_t = w - input_x - for i in range(batch_size): - for j in range(channel): - coefficients = [0, 0, 0, 0] - for ii in range(4): - access_x_0 = int(max(min(input_x - 1, in_w - 1), 0)) - access_x_1 = int(max(min(input_x + 0, in_w - 1), 0)) - access_x_2 = int(max(min(input_x + 1, in_w - 1), 0)) - access_x_3 = int(max(min(input_x + 2, in_w - 1), 0)) - access_y = int(max(min(input_y - 1 + ii, in_h - 1), 0)) - - coefficients[ii] = cubic_interp1d( - input[i, j, access_y, access_x_0], - input[i, j, access_y, access_x_1], - input[i, j, access_y, access_x_2], - input[i, j, access_y, access_x_3], - x_t, - ) - out[i, j, k, l] = cubic_interp1d( - coefficients[0], - coefficients[1], - coefficients[2], - coefficients[3], - y_t, - ) - if data_layout == "NHWC": - out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC - return out.astype(input.dtype) - - -class TestBicubicInterpOp(OpTest): - def setUp(self): - self.out_size = None - self.actual_shape = None - self.data_layout = 'NCHW' - self.init_test_case() - self.op_type = "bicubic_interp" - # NOTE(dev): some AsDispensible input is not used under imperative mode. - input_np = np.random.random(self.input_shape).astype("float64") - - if self.data_layout == "NCHW": - in_h = self.input_shape[2] - in_w = self.input_shape[3] - else: - in_h = self.input_shape[1] - in_w = self.input_shape[2] - - if self.scale > 0: - out_h = int(in_h * self.scale) - out_w = int(in_w * self.scale) - else: - out_h = self.out_h - out_w = self.out_w - - output_np = bicubic_interp_np( - input_np, - out_h, - out_w, - self.out_size, - self.actual_shape, - self.align_corners, - self.data_layout, - ) - self.inputs = {'X': input_np} - if self.out_size is not None: - self.inputs['OutSize'] = self.out_size - if self.actual_shape is not None: - self.inputs['OutSize'] = self.actual_shape - - self.attrs = { - 'out_h': self.out_h, - 'out_w': self.out_w, - 'scale': self.scale, - 'interp_method': self.interp_method, - 'align_corners': self.align_corners, - 'data_layout': self.data_layout, - } - self.outputs = {'Out': output_np} - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(['X'], 'Out', in_place=True) - - def init_test_case(self): - self.interp_method = 'bicubic' - self.input_shape = [2, 3, 5, 5] - self.out_h = 2 - self.out_w = 2 - self.scale = 0.0 - self.out_size = np.array([3, 3]).astype("int32") - self.align_corners = True - - -class TestBicubicInterpCase1(TestBicubicInterpOp): - def init_test_case(self): - self.interp_method = 'bicubic' - self.input_shape = [4, 1, 7, 8] - self.out_h = 1 - self.out_w = 1 - self.scale = 0.0 - self.align_corners = True - - -class TestBicubicInterpCase2(TestBicubicInterpOp): - def init_test_case(self): - self.interp_method = 'bicubic' - self.input_shape = [3, 3, 9, 6] - self.out_h = 10 - self.out_w = 8 - self.scale = 0.0 - self.align_corners = True - - -class TestBicubicInterpCase3(TestBicubicInterpOp): - def init_test_case(self): - self.interp_method = 'bicubic' - self.input_shape = [1, 1, 32, 64] - self.out_h = 64 - self.out_w = 32 - self.scale = 0.0 - self.align_corners = False - - -class TestBicubicInterpCase4(TestBicubicInterpOp): - def init_test_case(self): - self.interp_method = 'bicubic' - self.input_shape = [4, 1, 7, 8] - self.out_h = 1 - self.out_w = 1 - self.scale = 0.0 - self.out_size = np.array([2, 2]).astype("int32") - self.align_corners = True - - -class TestBicubicInterpCase5(TestBicubicInterpOp): - def init_test_case(self): - self.interp_method = 'bicubic' - self.input_shape = [3, 3, 9, 6] - self.out_h = 11 - self.out_w = 11 - self.scale = 0.0 - self.out_size = np.array([6, 4]).astype("int32") - self.align_corners = False - - -class TestBicubicInterpCase6(TestBicubicInterpOp): - def init_test_case(self): - self.interp_method = 'bicubic' - self.input_shape = [1, 1, 32, 64] - self.out_h = 64 - self.out_w = 32 - self.scale = 0 - self.out_size = np.array([64, 32]).astype("int32") - self.align_corners = False - - -class TestBicubicInterpSame(TestBicubicInterpOp): - def init_test_case(self): - self.interp_method = 'bicubic' - self.input_shape = [2, 3, 32, 64] - self.out_h = 32 - self.out_w = 64 - self.scale = 0.0 - self.align_corners = True - - -class TestBicubicInterpDataLayout(TestBicubicInterpOp): - def init_test_case(self): - self.interp_method = 'bicubic' - self.input_shape = [2, 5, 5, 3] - self.out_h = 2 - self.out_w = 2 - self.scale = 0.0 - self.out_size = np.array([3, 3]).astype("int32") - self.align_corners = True - self.data_layout = "NHWC" - - -class TestBicubicInterpOpAPI(unittest.TestCase): - def test_case(self): - np.random.seed(200) - x_data = np.random.random((2, 3, 6, 6)).astype("float32") - dim_data = np.array([12]).astype("int32") - shape_data = np.array([12, 12]).astype("int32") - actual_size_data = np.array([12, 12]).astype("int32") - scale_data = np.array([2.0]).astype("float32") - - prog = base.Program() - startup_prog = base.Program() - place = ( - base.CUDAPlace(0) - if base.core.is_compiled_with_cuda() - else base.CPUPlace() - ) - - with base.program_guard(prog, startup_prog): - x = paddle.static.data( - name="x", shape=[2, 3, 6, 6], dtype="float32" - ) - - dim = paddle.static.data(name="dim", shape=[1], dtype="int32") - shape_tensor = paddle.static.data( - name="shape_tensor", shape=[2], dtype="int32" - ) - actual_size = paddle.static.data( - name="actual_size", shape=[2], dtype="int32" - ) - scale_tensor = paddle.static.data( - name="scale_tensor", shape=[1], dtype="float32" - ) - - out1 = interpolate( - x, size=[12, 12], mode='bicubic', align_corners=False - ) - out2 = interpolate( - x, size=[12, dim], mode='bicubic', align_corners=False - ) - out3 = interpolate( - x, size=shape_tensor, mode='bicubic', align_corners=False - ) - out4 = interpolate( - x, size=[12, 12], mode='bicubic', align_corners=False - ) - out5 = interpolate( - x, - scale_factor=scale_tensor, - mode='bicubic', - align_corners=False, - ) - - exe = base.Executor(place) - exe.run(base.default_startup_program()) - results = exe.run( - base.default_main_program(), - feed={ - "x": x_data, - "dim": dim_data, - "shape_tensor": shape_data, - "actual_size": actual_size_data, - "scale_tensor": scale_data, - }, - fetch_list=[out1, out2, out3, out4, out5], - return_numpy=True, - ) - - expect_res = bicubic_interp_np( - x_data, out_h=12, out_w=12, align_corners=False - ) - for res in results: - np.testing.assert_allclose(res, expect_res, rtol=1e-05) - - with base.dygraph.guard(): - x = paddle.to_tensor(x_data) - interp = interpolate( - x, size=[12, 12], mode='bicubic', align_corners=False - ) - dy_result = interp.numpy() - expect = bicubic_interp_np( - x_data, out_h=12, out_w=12, align_corners=False - ) - np.testing.assert_allclose(dy_result, expect, rtol=1e-05) - - -class TestBicubicOpError(unittest.TestCase): - @test_with_pir_api - def test_errors(self): - with paddle.static.program_guard( - paddle.static.Program(), paddle.static.Program() - ): - # the input of interpoalte must be Variable. - x1 = base.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], base.CPUPlace() - ) - self.assertRaises(TypeError, interpolate, x1) - - def test_mode_type(): - # mode must be "BILINEAR" "TRILINEAR" "NEAREST" "BICUBIC" - x = paddle.static.data( - name="x", shape=[2, 3, 6, 6], dtype="float32" - ) - - out = interpolate( - x, size=[12, 12], mode='UNKONWN', align_corners=False - ) - - def test_input_shape(): - x = paddle.static.data(name="x", shape=[2], dtype="float32") - out = interpolate( - x, size=[12, 12], mode='BICUBIC', align_corners=False - ) - - def test_size_shape(): - x = paddle.static.data( - name="x", shape=[2, 3, 6, 6], dtype="float32" - ) - out = interpolate( - x, size=[12], mode='BICUBIC', align_corners=False - ) - - def test_align_corcers(): - x = paddle.static.data( - name="x", shape=[2, 3, 6, 6], dtype="float32" - ) - interpolate(x, size=[12, 12], mode='BICUBIC', align_corners=3) - - def test_out_shape(): - x = paddle.static.data( - name="x", shape=[2, 3, 6, 6], dtype="float32" - ) - out = interpolate( - x, size=[12], mode='bicubic', align_corners=False - ) - - def test_attr_data_format(): - # for 5-D input, data_format only can be NCDHW or NDHWC - input = paddle.static.data( - name="input", shape=[2, 3, 6, 9, 4], dtype="float32" - ) - out = interpolate( - input, - size=[4, 8, 4, 5], - mode='trilinear', - data_format='NHWC', - ) - - def test_actual_shape(): - # the actual_shape must be Variable. - x = base.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], base.CPUPlace() - ) - out = interpolate( - x, size=[12, 12], mode='BICUBIC', align_corners=False - ) - - def test_scale_value(): - # the scale must be greater than zero. - x = paddle.static.data( - name="x", shape=[2, 3, 6, 6], dtype="float32" - ) - out = interpolate( - x, - size=None, - mode='BICUBIC', - align_corners=False, - scale_factor=-2.0, - ) - - def test_attr_5D_input(): - # for 5-D input, data_format only can be NCDHW or NDHWC - input = paddle.static.data( - name="input", shape=[2, 3, 6, 9, 4], dtype="float32" - ) - out = interpolate( - input, - size=[4, 8, 4, 5], - mode='trilinear', - data_format='NDHWC', - ) - - def test_scale_type(): - # the scale must be greater than zero. - x = paddle.static.data( - name="x", shape=[2, 3, 6, 6], dtype="float32" - ) - scale = base.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], base.CPUPlace() - ) - out = interpolate( - x, - size=None, - mode='bicubic', - align_corners=False, - scale_factor=scale, - ) - - def test_align_mode(): - x = paddle.static.data( - name="x", shape=[2, 3, 6, 6], dtype="float32" - ) - out = interpolate( - x, - size=None, - mode='nearest', - align_corners=False, - align_mode=2, - scale_factor=1.0, - ) - - def test_outshape_and_scale(): - x = paddle.static.data( - name="x", shape=[2, 3, 6, 6], dtype="float32" - ) - out = interpolate( - x, - size=None, - mode='bicubic', - align_corners=False, - scale_factor=None, - ) - - self.assertRaises(ValueError, test_mode_type) - self.assertRaises(ValueError, test_input_shape) - self.assertRaises(ValueError, test_size_shape) - self.assertRaises(TypeError, test_align_corcers) - self.assertRaises(ValueError, test_attr_data_format) - self.assertRaises(TypeError, test_actual_shape) - self.assertRaises(ValueError, test_scale_value) - self.assertRaises(ValueError, test_out_shape) - self.assertRaises(ValueError, test_attr_5D_input) - self.assertRaises(TypeError, test_scale_type) - self.assertRaises(ValueError, test_align_mode) - self.assertRaises(ValueError, test_outshape_and_scale) - - -if __name__ == "__main__": - paddle.enable_static() - unittest.main() diff --git a/test/legacy_test/test_bilinear_interp_op.py b/test/legacy_test/test_bilinear_interp_op.py deleted file mode 100755 index 9409762d881c00..00000000000000 --- a/test/legacy_test/test_bilinear_interp_op.py +++ /dev/null @@ -1,520 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - -import paddle -from paddle.base import core - -paddle.enable_static() - - -def bilinear_interp_np( - input, - out_h, - out_w, - out_size=None, - actual_shape=None, - align_corners=True, - align_mode=0, - data_layout='NCHW', -): - """bilinear interpolation implement in shape [N, C, H, W]""" - if data_layout == "NHWC": - input = np.transpose(input, (0, 3, 1, 2)) # NHWC => NCHW - if out_size is not None: - out_h = out_size[0] - out_w = out_size[1] - if actual_shape is not None: - out_h = actual_shape[0] - out_w = actual_shape[1] - batch_size, channel, in_h, in_w = input.shape - - ratio_h = ratio_w = 0.0 - if out_h > 1: - if align_corners: - ratio_h = (in_h - 1.0) / (out_h - 1.0) - else: - ratio_h = 1.0 * in_h / out_h - if out_w > 1: - if align_corners: - ratio_w = (in_w - 1.0) / (out_w - 1.0) - else: - ratio_w = 1.0 * in_w / out_w - - out = np.zeros((batch_size, channel, out_h, out_w)) - - for i in range(out_h): - if align_mode == 0 and not align_corners: - h = int(ratio_h * (i + 0.5) - 0.5) - else: - h = int(ratio_h * i) - - h = max(0, h) - hid = 1 if h < in_h - 1 else 0 - if align_mode == 0 and not align_corners: - idx_src_h = max(ratio_h * (i + 0.5) - 0.5, 0) - h1lambda = idx_src_h - h - else: - h1lambda = ratio_h * i - h - h2lambda = 1.0 - h1lambda - for j in range(out_w): - if align_mode == 0 and not align_corners: - w = int(ratio_w * (j + 0.5) - 0.5) - else: - w = int(ratio_w * j) - w = max(0, w) - wid = 1 if w < in_w - 1 else 0 - if align_mode == 0 and not align_corners: - idx_src_w = max(ratio_w * (j + 0.5) - 0.5, 0) - w1lambda = idx_src_w - w - else: - w1lambda = ratio_w * j - w - w2lambda = 1.0 - w1lambda - - out[:, :, i, j] = h2lambda * ( - w2lambda * input[:, :, h, w] - + w1lambda * input[:, :, h, w + wid] - ) + h1lambda * ( - w2lambda * input[:, :, h + hid, w] - + w1lambda * input[:, :, h + hid, w + wid] - ) - - if data_layout == "NHWC": - out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC - - return out.astype(input.dtype) - - -class TestBilinearInterpOp(OpTest): - def setUp(self): - self.out_size = None - self.actual_shape = None - self.data_layout = 'NCHW' - self.init_test_case() - self.op_type = "bilinear_interp" - # NOTE(dev): some AsDispensible input is not used under imperative mode. - # Skip check_dygraph while found them in Inputs. - input_np = np.random.random(self.input_shape).astype("float64") - - if self.data_layout == "NCHW": - in_h = self.input_shape[2] - in_w = self.input_shape[3] - else: - in_h = self.input_shape[1] - in_w = self.input_shape[2] - - if self.scale > 0: - out_h = int(in_h * self.scale) - out_w = int(in_w * self.scale) - else: - out_h = self.out_h - out_w = self.out_w - - output_np = bilinear_interp_np( - input_np, - out_h, - out_w, - self.out_size, - self.actual_shape, - self.align_corners, - self.align_mode, - self.data_layout, - ) - self.inputs = {'X': input_np} - if self.out_size is not None: - self.inputs['OutSize'] = self.out_size - if self.actual_shape is not None: - self.inputs['OutSize'] = self.actual_shape - - self.attrs = { - 'out_h': self.out_h, - 'out_w': self.out_w, - 'scale': self.scale, - 'interp_method': self.interp_method, - 'align_corners': self.align_corners, - 'align_mode': self.align_mode, - 'data_layout': self.data_layout, - } - self.outputs = {'Out': output_np} - - def test_check_output(self): - self.check_output(check_dygraph=False) - - def test_check_grad(self): - self.check_grad(['X'], 'Out', in_place=True, check_dygraph=False) - - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 3, 5, 5] - self.out_h = 2 - self.out_w = 2 - self.scale = 0.0 - self.out_size = np.array([3, 3]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -class TestBilinearInterpCase1(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [4, 1, 7, 8] - self.out_h = 1 - self.out_w = 1 - self.scale = 0.0 - self.align_corners = True - self.align_mode = 1 - - -class TestBilinearInterpCase2(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [3, 3, 9, 6] - self.out_h = 12 - self.out_w = 12 - self.scale = 0.0 - self.align_corners = True - self.align_mode = 1 - - -class TestBilinearInterpCase3(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [1, 1, 32, 64] - self.out_h = 64 - self.out_w = 32 - self.scale = 0.0 - self.align_corners = True - self.align_mode = 1 - - -class TestBilinearInterpCase4(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [4, 1, 7, 8] - self.out_h = 1 - self.out_w = 1 - self.scale = 0.0 - self.out_size = np.array([2, 2]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -class TestBilinearInterpCase5(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [3, 3, 9, 6] - self.out_h = 12 - self.out_w = 12 - self.scale = 0.0 - self.out_size = np.array([11, 11]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -class TestBilinearInterpCase6(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [1, 1, 32, 64] - self.out_h = 64 - self.out_w = 32 - self.scale = 0.0 - self.out_size = np.array([65, 33]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -class TestBilinearInterpSame(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 3, 32, 64] - self.out_h = 32 - self.out_w = 64 - self.scale = 0.0 - self.align_corners = True - self.align_mode = 1 - - -class TestBilinearInterpActualShape(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [3, 2, 32, 16] - self.out_h = 64 - self.out_w = 32 - self.scale = 0.0 - self.out_size = np.array([66, 40]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -class TestBilinearInterpDataLayout(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 5, 5, 3] - self.out_h = 2 - self.out_w = 2 - self.scale = 0.0 - self.out_size = np.array([3, 3]).astype("int32") - self.align_corners = True - self.align_mode = 1 - self.data_layout = "NHWC" - - -class TestBilinearInterpOpUint8(OpTest): - def setUp(self): - self.out_size = None - self.actual_shape = None - self.init_test_case() - self.op_type = "bilinear_interp" - input_np = np.random.randint( - low=0, high=256, size=self.input_shape - ).astype("uint8") - - if self.scale > 0: - out_h = int(self.input_shape[2] * self.scale) - out_w = int(self.input_shape[3] * self.scale) - else: - out_h = self.out_h - out_w = self.out_w - - output_np = bilinear_interp_np( - input_np, - out_h, - out_w, - self.out_size, - self.actual_shape, - self.align_corners, - self.align_mode, - ) - self.inputs = {'X': input_np} - if self.out_size is not None: - self.inputs['OutSize'] = self.out_size - - self.attrs = { - 'out_h': self.out_h, - 'out_w': self.out_w, - 'scale': self.scale, - 'interp_method': self.interp_method, - 'align_corners': self.align_corners, - 'align_mode': self.align_mode, - } - self.outputs = {'Out': output_np} - - def test_check_output(self): - self.check_output_with_place( - place=core.CPUPlace(), atol=1, check_dygraph=False - ) - - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [1, 3, 9, 6] - self.out_h = 10 - self.out_w = 9 - self.scale = 0.0 - self.align_corners = True - self.align_mode = 1 - - -class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 3, 32, 64] - self.out_h = 64 - self.out_w = 32 - self.scale = 0.0 - self.align_corners = True - self.align_mode = 1 - - -class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [4, 1, 7, 8] - self.out_h = 5 - self.out_w = 13 - self.scale = 0.0 - self.out_size = np.array([6, 15]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -class TestBilinearInterpOtherMethod1(TestBilinearInterpOp): - def set_align_mode(self): - self.align_corners = False - self.align_mode = 1 - - -class TestBilinearInterpWithMethod2(TestBilinearInterpOp): - def set_align_mode(self): - self.align_corners = False - self.align_mode = 0 - - -class TestBilinearInterpWithMethod3(TestBilinearInterpOp): - def set_align_mode(self): - self.align_corners = True - self.align_mode = 0 - - -class TestBilinearInterpScale1(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 3, 5, 7] - self.out_h = 60 - self.out_w = 25 - self.scale = 2.0 - self.align_corners = True - self.align_mode = 1 - - -class TestBilinearInterpScale2(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 3, 5, 7] - self.out_h = 60 - self.out_w = 25 - self.scale = 1.0 - self.align_corners = True - self.align_mode = 1 - - -class TestBilinearInterpScale3(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 3, 5, 7] - self.out_h = 60 - self.out_w = 25 - self.scale = 1.5 - self.align_corners = True - self.align_mode = 1 - - -class TestBilinearInterpZero(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 3, 5, 7] - self.out_h = 60 - self.out_w = 25 - self.scale = 0.2 - self.align_corners = False - self.align_mode = 0 - - -class TestBilinearInterpOp_attr_tensor(OpTest): - def setUp(self): - self.out_size = None - self.actual_shape = None - self.init_test_case() - self.op_type = "bilinear_interp" - self.shape_by_1Dtensor = False - self.scale_by_1Dtensor = False - self.attrs = { - 'interp_method': self.interp_method, - 'align_corners': self.align_corners, - } - - input_np = np.random.random(self.input_shape).astype("float64") - self.inputs = {'X': input_np} - - if self.scale_by_1Dtensor: - self.inputs['Scale'] = np.array([self.scale]).astype("float32") - elif self.scale > 0: - out_h = int(self.input_shape[2] * self.scale) - out_w = int(self.input_shape[3] * self.scale) - self.attrs['scale'] = self.scale - else: - out_h = self.out_h - out_w = self.out_w - - if self.shape_by_1Dtensor: - self.inputs['OutSize'] = self.out_size - elif self.out_size is not None: - size_tensor = [] - for index, ele in enumerate(self.out_size): - size_tensor.append( - ("x" + str(index), np.ones(1).astype('int32') * ele) - ) - self.inputs['SizeTensor'] = size_tensor - - self.attrs['out_h'] = self.out_h - self.attrs['out_w'] = self.out_w - output_np = bilinear_interp_np( - input_np, - out_h, - out_w, - self.out_size, - self.actual_shape, - self.align_corners, - ) - self.outputs = {'Out': output_np} - - def test_check_output(self): - self.check_output(check_dygraph=False) - - def test_check_grad(self): - self.check_grad(['X'], 'Out', in_place=True, check_dygraph=False) - - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 3, 5, 5] - self.out_h = 3 - self.out_w = 3 - self.scale = 0.0 - self.out_size = [3, 3] - self.align_corners = True - - -# out_size is a 1-D tensor -class TestBilinearInterp_attr_tensor_Case1(TestBilinearInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [3, 3, 9, 6] - self.out_h = 12 - self.out_w = 12 - self.scale = 0.0 - self.out_size = [8, 12] - self.align_corners = True - - -# scale is a 1-D tensor -class TestBilinearInterp_attr_tensor_Case2(TestBilinearInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [3, 2, 32, 16] - self.out_h = 64 - self.out_w = 32 - self.scale = 0.0 - self.out_size = np.array([66, 40]).astype("int32") - self.align_corners = True - self.shape_by_1Dtensor = True - - -# scale is a 1-D tensor -class TestBilinearInterp_attr_tensor_Case3(TestBilinearInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [3, 2, 32, 16] - self.out_h = 64 - self.out_w = 32 - self.scale = 2.0 - self.out_size = None - self.align_corners = True - self.scale_by_1Dtensor = True - - -if __name__ == "__main__": - unittest.main() diff --git a/test/legacy_test/test_generate_proposals_op.py b/test/legacy_test/test_generate_proposals_op.py deleted file mode 100644 index 901d009effc5bc..00000000000000 --- a/test/legacy_test/test_generate_proposals_op.py +++ /dev/null @@ -1,452 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import copy -import math -import unittest - -import numpy as np -from op_test import OpTest -from test_anchor_generator_op import anchor_generator_in_python - -import paddle - - -def generate_proposals_in_python( - scores, - bbox_deltas, - im_info, - anchors, - variances, - pre_nms_topN, - post_nms_topN, - nms_thresh, - min_size, - eta, -): - all_anchors = anchors.reshape(-1, 4) - rois = np.empty((0, 5), dtype=np.float32) - roi_probs = np.empty((0, 1), dtype=np.float32) - - rpn_rois = [] - rpn_roi_probs = [] - rois_num = [] - num_images = scores.shape[0] - for img_idx in range(num_images): - img_i_boxes, img_i_probs = proposal_for_one_image( - im_info[img_idx, :], - all_anchors, - variances, - bbox_deltas[img_idx, :, :, :], - scores[img_idx, :, :, :], - pre_nms_topN, - post_nms_topN, - nms_thresh, - min_size, - eta, - ) - rois_num.append(img_i_probs.shape[0]) - rpn_rois.append(img_i_boxes) - rpn_roi_probs.append(img_i_probs) - - return rpn_rois, rpn_roi_probs, rois_num - - -def proposal_for_one_image( - im_info, - all_anchors, - variances, - bbox_deltas, - scores, - pre_nms_topN, - post_nms_topN, - nms_thresh, - min_size, - eta, -): - # Transpose and reshape predicted bbox transformations to get them - # into the same order as the anchors: - # - bbox deltas will be (4 * A, H, W) format from conv output - # - transpose to (H, W, 4 * A) - # - reshape to (H * W * A, 4) where rows are ordered by (H, W, A) - # in slowest to fastest order to match the enumerated anchors - bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape(-1, 4) - all_anchors = all_anchors.reshape(-1, 4) - variances = variances.reshape(-1, 4) - # Same story for the scores: - # - scores are (A, H, W) format from conv output - # - transpose to (H, W, A) - # - reshape to (H * W * A, 1) where rows are ordered by (H, W, A) - # to match the order of anchors and bbox_deltas - scores = scores.transpose((1, 2, 0)).reshape(-1, 1) - - # sort all (proposal, score) pairs by score from highest to lowest - # take top pre_nms_topN (e.g. 6000) - if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): - order = np.argsort(-scores.squeeze()) - else: - # Avoid sorting possibly large arrays; - # First partition to get top K unsorted - # and then sort just those - inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN] - order = np.argsort(-scores[inds].squeeze()) - order = inds[order] - scores = scores[order, :] - bbox_deltas = bbox_deltas[order, :] - all_anchors = all_anchors[order, :] - proposals = box_coder(all_anchors, bbox_deltas, variances) - # clip proposals to image (may result in proposals with zero area - # that will be removed in the next step) - proposals = clip_tiled_boxes(proposals, im_info[:2]) - # remove predicted boxes with height or width < min_size - keep = filter_boxes(proposals, min_size, im_info) - if len(keep) == 0: - proposals = np.zeros((1, 4)).astype('float32') - scores = np.zeros((1, 1)).astype('float32') - return proposals, scores - proposals = proposals[keep, :] - scores = scores[keep, :] - - # apply loose nms (e.g. threshold = 0.7) - # take post_nms_topN (e.g. 1000) - # return the top proposals - if nms_thresh > 0: - keep = nms( - boxes=proposals, scores=scores, nms_threshold=nms_thresh, eta=eta - ) - if post_nms_topN > 0 and post_nms_topN < len(keep): - keep = keep[:post_nms_topN] - proposals = proposals[keep, :] - scores = scores[keep, :] - - return proposals, scores - - -def box_coder(all_anchors, bbox_deltas, variances, pixel_offset=True): - """ - Decode proposals by anchors and bbox_deltas from RPN - """ - offset = 1 if pixel_offset else 0 - # proposals: xmin, ymin, xmax, ymax - proposals = np.zeros_like(bbox_deltas, dtype=np.float32) - - # anchor_loc: width, height, center_x, center_y - anchor_loc = np.zeros_like(bbox_deltas, dtype=np.float32) - - anchor_loc[:, 0] = all_anchors[:, 2] - all_anchors[:, 0] + offset - anchor_loc[:, 1] = all_anchors[:, 3] - all_anchors[:, 1] + offset - anchor_loc[:, 2] = all_anchors[:, 0] + 0.5 * anchor_loc[:, 0] - anchor_loc[:, 3] = all_anchors[:, 1] + 0.5 * anchor_loc[:, 1] - - # predicted bbox: bbox_center_x, bbox_center_y, bbox_width, bbox_height - pred_bbox = np.zeros_like(bbox_deltas, dtype=np.float32) - if variances is not None: - for i in range(bbox_deltas.shape[0]): - pred_bbox[i, 0] = ( - variances[i, 0] * bbox_deltas[i, 0] * anchor_loc[i, 0] - + anchor_loc[i, 2] - ) - pred_bbox[i, 1] = ( - variances[i, 1] * bbox_deltas[i, 1] * anchor_loc[i, 1] - + anchor_loc[i, 3] - ) - pred_bbox[i, 2] = ( - math.exp( - min( - variances[i, 2] * bbox_deltas[i, 2], - math.log(1000 / 16.0), - ) - ) - * anchor_loc[i, 0] - ) - pred_bbox[i, 3] = ( - math.exp( - min( - variances[i, 3] * bbox_deltas[i, 3], - math.log(1000 / 16.0), - ) - ) - * anchor_loc[i, 1] - ) - else: - for i in range(bbox_deltas.shape[0]): - pred_bbox[i, 0] = ( - bbox_deltas[i, 0] * anchor_loc[i, 0] + anchor_loc[i, 2] - ) - pred_bbox[i, 1] = ( - bbox_deltas[i, 1] * anchor_loc[i, 1] + anchor_loc[i, 3] - ) - pred_bbox[i, 2] = ( - math.exp(min(bbox_deltas[i, 2], math.log(1000 / 16.0))) - * anchor_loc[i, 0] - ) - pred_bbox[i, 3] = ( - math.exp(min(bbox_deltas[i, 3], math.log(1000 / 16.0))) - * anchor_loc[i, 1] - ) - proposals[:, 0] = pred_bbox[:, 0] - pred_bbox[:, 2] / 2 - proposals[:, 1] = pred_bbox[:, 1] - pred_bbox[:, 3] / 2 - proposals[:, 2] = pred_bbox[:, 0] + pred_bbox[:, 2] / 2 - offset - proposals[:, 3] = pred_bbox[:, 1] + pred_bbox[:, 3] / 2 - offset - - return proposals - - -def clip_tiled_boxes(boxes, im_shape, pixel_offset=True): - """Clip boxes to image boundaries. im_shape is [height, width] and boxes - has shape (N, 4 * num_tiled_boxes).""" - assert ( - boxes.shape[1] % 4 == 0 - ), f'boxes.shape[1] is {boxes.shape[1]:d}, but must be divisible by 4.' - offset = 1 if pixel_offset else 0 - # x1 >= 0 - boxes[:, 0::4] = np.maximum( - np.minimum(boxes[:, 0::4], im_shape[1] - offset), 0 - ) - # y1 >= 0 - boxes[:, 1::4] = np.maximum( - np.minimum(boxes[:, 1::4], im_shape[0] - offset), 0 - ) - # x2 < im_shape[1] - boxes[:, 2::4] = np.maximum( - np.minimum(boxes[:, 2::4], im_shape[1] - offset), 0 - ) - # y2 < im_shape[0] - boxes[:, 3::4] = np.maximum( - np.minimum(boxes[:, 3::4], im_shape[0] - offset), 0 - ) - return boxes - - -def filter_boxes(boxes, min_size, im_info, pixel_offset=True): - """Only keep boxes with both sides >= min_size and center within the image.""" - # Scale min_size to match image scale - im_scale = im_info[2] - min_size = max(min_size, 1.0) - offset = 1 if pixel_offset else 0 - ws = boxes[:, 2] - boxes[:, 0] + offset - hs = boxes[:, 3] - boxes[:, 1] + offset - if pixel_offset: - ws_orig_scale = (boxes[:, 2] - boxes[:, 0]) / im_scale + 1 - hs_orig_scale = (boxes[:, 3] - boxes[:, 1]) / im_scale + 1 - x_ctr = boxes[:, 0] + ws / 2.0 - y_ctr = boxes[:, 1] + hs / 2.0 - keep = np.where( - (ws_orig_scale >= min_size) - & (hs_orig_scale >= min_size) - & (x_ctr < im_info[1]) - & (y_ctr < im_info[0]) - )[0] - else: - keep = np.where((ws >= min_size) & (hs >= min_size))[0] - return keep - - -def iou(box_a, box_b, pixel_offset=True): - """ - Apply intersection-over-union overlap between box_a and box_b - """ - xmin_a = min(box_a[0], box_a[2]) - ymin_a = min(box_a[1], box_a[3]) - xmax_a = max(box_a[0], box_a[2]) - ymax_a = max(box_a[1], box_a[3]) - - xmin_b = min(box_b[0], box_b[2]) - ymin_b = min(box_b[1], box_b[3]) - xmax_b = max(box_b[0], box_b[2]) - ymax_b = max(box_b[1], box_b[3]) - offset = 1 if pixel_offset else 0 - area_a = (ymax_a - ymin_a + offset) * (xmax_a - xmin_a + offset) - area_b = (ymax_b - ymin_b + offset) * (xmax_b - xmin_b + offset) - if area_a <= 0 and area_b <= 0: - return 0.0 - - xa = max(xmin_a, xmin_b) - ya = max(ymin_a, ymin_b) - xb = min(xmax_a, xmax_b) - yb = min(ymax_a, ymax_b) - - inter_area = max(xb - xa + offset, 0.0) * max(yb - ya + offset, 0.0) - - iou_ratio = inter_area / (area_a + area_b - inter_area) - - return iou_ratio - - -def nms(boxes, scores, nms_threshold, eta=1.0, pixel_offset=True): - """Apply non-maximum suppression at test time to avoid detecting too many - overlapping bounding boxes for a given object. - Args: - boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. - scores: (tensor) The class predscores for the img, Shape:[num_priors]. - nms_threshold: (float) The overlap thresh for suppressing unnecessary - boxes. - eta: (float) The parameter for adaptive NMS. - Return: - The indices of the kept boxes with respect to num_priors. - """ - all_scores = copy.deepcopy(scores) - all_scores = all_scores.flatten() - - sorted_indices = np.argsort(-all_scores, axis=0, kind='mergesort') - sorted_scores = all_scores[sorted_indices] - selected_indices = [] - adaptive_threshold = nms_threshold - for i in range(sorted_scores.shape[0]): - idx = sorted_indices[i] - keep = True - for k in range(len(selected_indices)): - if keep: - kept_idx = selected_indices[k] - overlap = iou( - boxes[idx], boxes[kept_idx], pixel_offset=pixel_offset - ) - keep = True if overlap <= adaptive_threshold else False - else: - break - if keep: - selected_indices.append(idx) - if keep and eta < 1 and adaptive_threshold > 0.5: - adaptive_threshold *= eta - return selected_indices - - -class TestGenerateProposalsOp(OpTest): - def set_data(self): - self.init_test_params() - self.init_test_input() - self.init_test_output() - self.inputs = { - 'Scores': self.scores, - 'BboxDeltas': self.bbox_deltas, - 'ImInfo': self.im_info.astype(np.float32), - 'Anchors': self.anchors, - 'Variances': self.variances, - } - - self.attrs = { - 'pre_nms_topN': self.pre_nms_topN, - 'post_nms_topN': self.post_nms_topN, - 'nms_thresh': self.nms_thresh, - 'min_size': self.min_size, - 'eta': self.eta, - } - - self.outputs = { - 'RpnRois': (self.rpn_rois[0], [self.rois_num]), - 'RpnRoiProbs': (self.rpn_roi_probs[0], [self.rois_num]), - } - - def test_check_output(self): - # NODE(yjjiang11): This op will be deprecated. - self.check_output(check_dygraph=False) - - def setUp(self): - self.op_type = "generate_proposals" - self.set_data() - - def init_test_params(self): - self.pre_nms_topN = 12000 # train 12000, test 2000 - self.post_nms_topN = 5000 # train 6000, test 1000 - self.nms_thresh = 0.7 - self.min_size = 3.0 - self.eta = 1.0 - - def init_test_input(self): - batch_size = 1 - input_channels = 20 - layer_h = 16 - layer_w = 16 - input_feat = np.random.random( - (batch_size, input_channels, layer_h, layer_w) - ).astype('float32') - self.anchors, self.variances = anchor_generator_in_python( - input_feat=input_feat, - anchor_sizes=[16.0, 32.0], - aspect_ratios=[0.5, 1.0], - variances=[1.0, 1.0, 1.0, 1.0], - stride=[16.0, 16.0], - offset=0.5, - ) - self.im_info = np.array( - [[64.0, 64.0, 8.0]] - ) # im_height, im_width, scale - num_anchors = self.anchors.shape[2] - self.scores = np.random.random( - (batch_size, num_anchors, layer_h, layer_w) - ).astype('float32') - self.bbox_deltas = np.random.random( - (batch_size, num_anchors * 4, layer_h, layer_w) - ).astype('float32') - - def init_test_output(self): - ( - self.rpn_rois, - self.rpn_roi_probs, - self.rois_num, - ) = generate_proposals_in_python( - self.scores, - self.bbox_deltas, - self.im_info, - self.anchors, - self.variances, - self.pre_nms_topN, - self.post_nms_topN, - self.nms_thresh, - self.min_size, - self.eta, - ) - - -class TestGenerateProposalsOutLodOp(TestGenerateProposalsOp): - def set_data(self): - self.init_test_params() - self.init_test_input() - self.init_test_output() - self.inputs = { - 'Scores': self.scores, - 'BboxDeltas': self.bbox_deltas, - 'ImInfo': self.im_info.astype(np.float32), - 'Anchors': self.anchors, - 'Variances': self.variances, - } - - self.attrs = { - 'pre_nms_topN': self.pre_nms_topN, - 'post_nms_topN': self.post_nms_topN, - 'nms_thresh': self.nms_thresh, - 'min_size': self.min_size, - 'eta': self.eta, - 'return_rois_num': True, - } - - self.outputs = { - 'RpnRois': (self.rpn_rois[0], [self.rois_num]), - 'RpnRoiProbs': (self.rpn_roi_probs[0], [self.rois_num]), - 'RpnRoisNum': (np.asarray(self.rois_num, dtype=np.int32)), - } - - -class TestGenerateProposalsOpNoBoxLeft(TestGenerateProposalsOp): - def init_test_params(self): - self.pre_nms_topN = 12000 # train 12000, test 2000 - self.post_nms_topN = 5000 # train 6000, test 1000 - self.nms_thresh = 0.7 - self.min_size = 1000.0 - self.eta = 1.0 - - -if __name__ == '__main__': - paddle.enable_static() - unittest.main() diff --git a/test/legacy_test/test_generate_proposals_v2_op.py b/test/legacy_test/test_generate_proposals_v2_op.py index 87e9e6c60fe7d6..b0eaf05ea6a753 100644 --- a/test/legacy_test/test_generate_proposals_v2_op.py +++ b/test/legacy_test/test_generate_proposals_v2_op.py @@ -12,16 +12,182 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy +import math import unittest import numpy as np from op_test import OpTest from test_anchor_generator_op import anchor_generator_in_python -from test_generate_proposals_op import box_coder, clip_tiled_boxes, nms import paddle +def box_coder(all_anchors, bbox_deltas, variances, pixel_offset=True): + """ + Decode proposals by anchors and bbox_deltas from RPN + """ + offset = 1 if pixel_offset else 0 + # proposals: xmin, ymin, xmax, ymax + proposals = np.zeros_like(bbox_deltas, dtype=np.float32) + + # anchor_loc: width, height, center_x, center_y + anchor_loc = np.zeros_like(bbox_deltas, dtype=np.float32) + + anchor_loc[:, 0] = all_anchors[:, 2] - all_anchors[:, 0] + offset + anchor_loc[:, 1] = all_anchors[:, 3] - all_anchors[:, 1] + offset + anchor_loc[:, 2] = all_anchors[:, 0] + 0.5 * anchor_loc[:, 0] + anchor_loc[:, 3] = all_anchors[:, 1] + 0.5 * anchor_loc[:, 1] + + # predicted bbox: bbox_center_x, bbox_center_y, bbox_width, bbox_height + pred_bbox = np.zeros_like(bbox_deltas, dtype=np.float32) + if variances is not None: + for i in range(bbox_deltas.shape[0]): + pred_bbox[i, 0] = ( + variances[i, 0] * bbox_deltas[i, 0] * anchor_loc[i, 0] + + anchor_loc[i, 2] + ) + pred_bbox[i, 1] = ( + variances[i, 1] * bbox_deltas[i, 1] * anchor_loc[i, 1] + + anchor_loc[i, 3] + ) + pred_bbox[i, 2] = ( + math.exp( + min( + variances[i, 2] * bbox_deltas[i, 2], + math.log(1000 / 16.0), + ) + ) + * anchor_loc[i, 0] + ) + pred_bbox[i, 3] = ( + math.exp( + min( + variances[i, 3] * bbox_deltas[i, 3], + math.log(1000 / 16.0), + ) + ) + * anchor_loc[i, 1] + ) + else: + for i in range(bbox_deltas.shape[0]): + pred_bbox[i, 0] = ( + bbox_deltas[i, 0] * anchor_loc[i, 0] + anchor_loc[i, 2] + ) + pred_bbox[i, 1] = ( + bbox_deltas[i, 1] * anchor_loc[i, 1] + anchor_loc[i, 3] + ) + pred_bbox[i, 2] = ( + math.exp(min(bbox_deltas[i, 2], math.log(1000 / 16.0))) + * anchor_loc[i, 0] + ) + pred_bbox[i, 3] = ( + math.exp(min(bbox_deltas[i, 3], math.log(1000 / 16.0))) + * anchor_loc[i, 1] + ) + proposals[:, 0] = pred_bbox[:, 0] - pred_bbox[:, 2] / 2 + proposals[:, 1] = pred_bbox[:, 1] - pred_bbox[:, 3] / 2 + proposals[:, 2] = pred_bbox[:, 0] + pred_bbox[:, 2] / 2 - offset + proposals[:, 3] = pred_bbox[:, 1] + pred_bbox[:, 3] / 2 - offset + + return proposals + + +def clip_tiled_boxes(boxes, im_shape, pixel_offset=True): + """Clip boxes to image boundaries. im_shape is [height, width] and boxes + has shape (N, 4 * num_tiled_boxes).""" + assert ( + boxes.shape[1] % 4 == 0 + ), f'boxes.shape[1] is {boxes.shape[1]:d}, but must be divisible by 4.' + offset = 1 if pixel_offset else 0 + # x1 >= 0 + boxes[:, 0::4] = np.maximum( + np.minimum(boxes[:, 0::4], im_shape[1] - offset), 0 + ) + # y1 >= 0 + boxes[:, 1::4] = np.maximum( + np.minimum(boxes[:, 1::4], im_shape[0] - offset), 0 + ) + # x2 < im_shape[1] + boxes[:, 2::4] = np.maximum( + np.minimum(boxes[:, 2::4], im_shape[1] - offset), 0 + ) + # y2 < im_shape[0] + boxes[:, 3::4] = np.maximum( + np.minimum(boxes[:, 3::4], im_shape[0] - offset), 0 + ) + return boxes + + +def iou(box_a, box_b, pixel_offset=True): + """ + Apply intersection-over-union overlap between box_a and box_b + """ + xmin_a = min(box_a[0], box_a[2]) + ymin_a = min(box_a[1], box_a[3]) + xmax_a = max(box_a[0], box_a[2]) + ymax_a = max(box_a[1], box_a[3]) + + xmin_b = min(box_b[0], box_b[2]) + ymin_b = min(box_b[1], box_b[3]) + xmax_b = max(box_b[0], box_b[2]) + ymax_b = max(box_b[1], box_b[3]) + offset = 1 if pixel_offset else 0 + area_a = (ymax_a - ymin_a + offset) * (xmax_a - xmin_a + offset) + area_b = (ymax_b - ymin_b + offset) * (xmax_b - xmin_b + offset) + if area_a <= 0 and area_b <= 0: + return 0.0 + + xa = max(xmin_a, xmin_b) + ya = max(ymin_a, ymin_b) + xb = min(xmax_a, xmax_b) + yb = min(ymax_a, ymax_b) + + inter_area = max(xb - xa + offset, 0.0) * max(yb - ya + offset, 0.0) + + iou_ratio = inter_area / (area_a + area_b - inter_area) + + return iou_ratio + + +def nms(boxes, scores, nms_threshold, eta=1.0, pixel_offset=True): + """Apply non-maximum suppression at test time to avoid detecting too many + overlapping bounding boxes for a given object. + Args: + boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. + scores: (tensor) The class predscores for the img, Shape:[num_priors]. + nms_threshold: (float) The overlap thresh for suppressing unnecessary + boxes. + eta: (float) The parameter for adaptive NMS. + Return: + The indices of the kept boxes with respect to num_priors. + """ + all_scores = copy.deepcopy(scores) + all_scores = all_scores.flatten() + + sorted_indices = np.argsort(-all_scores, axis=0, kind='mergesort') + sorted_scores = all_scores[sorted_indices] + selected_indices = [] + adaptive_threshold = nms_threshold + for i in range(sorted_scores.shape[0]): + idx = sorted_indices[i] + keep = True + for k in range(len(selected_indices)): + if keep: + kept_idx = selected_indices[k] + overlap = iou( + boxes[idx], boxes[kept_idx], pixel_offset=pixel_offset + ) + keep = True if overlap <= adaptive_threshold else False + else: + break + if keep: + selected_indices.append(idx) + if keep and eta < 1 and adaptive_threshold > 0.5: + adaptive_threshold *= eta + return selected_indices + + def python_generate_proposals_v2( scores, bbox_deltas, diff --git a/test/legacy_test/test_linear_interp_op.py b/test/legacy_test/test_linear_interp_op.py deleted file mode 100755 index f5bd1e7e103d10..00000000000000 --- a/test/legacy_test/test_linear_interp_op.py +++ /dev/null @@ -1,381 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import platform -import unittest - -import numpy as np -from op_test import OpTest - -import paddle -from paddle import base -from paddle.base import core -from paddle.pir_utils import test_with_pir_api - - -def linear_interp_np( - input, - out_w, - out_size=None, - actual_shape=None, - align_corners=True, - align_mode=0, - data_layout='NCHW', -): - if data_layout == "NHWC": - input = np.transpose(input, (0, 2, 1)) # NHWC => NCHW - if out_size is not None: - out_w = out_size[0] - if actual_shape is not None: - out_w = actual_shape[0] - batch_size, channel, in_w = input.shape - - ratio_w = 0.0 - if out_w > 1: - if align_corners: - ratio_w = (in_w - 1.0) / (out_w - 1.0) - else: - ratio_w = 1.0 * in_w / out_w - - out = np.zeros((batch_size, channel, out_w)) - - for j in range(out_w): - if align_mode == 0 and not align_corners: - w = int(ratio_w * (j + 0.5) - 0.5) - else: - w = int(ratio_w * j) - w = max(0, w) - wid = 1 if w < in_w - 1 else 0 - - if align_mode == 0 and not align_corners: - idx_src_w = max(ratio_w * (j + 0.5) - 0.5, 0) - w1lambda = idx_src_w - w - else: - w1lambda = ratio_w * j - w - w2lambda = 1.0 - w1lambda - - out[:, :, j] = ( - w2lambda * input[:, :, w] + w1lambda * input[:, :, w + wid] - ) - - if data_layout == "NHWC": - out = np.transpose(out, (0, 2, 1)) # NCHW => NHWC - - return out.astype(input.dtype) - - -class TestLinearInterpOp(OpTest): - def setUp(self): - self.out_size = None - self.actual_shape = None - self.data_layout = 'NCHW' - self.init_test_case() - self.op_type = "linear_interp" - input_np = np.random.random(self.input_shape).astype("float64") - - if self.data_layout == "NCHW": - in_w = self.input_shape[2] - else: - in_w = self.input_shape[1] - - if self.scale > 0: - out_w = int(in_w * self.scale) - else: - out_w = self.out_w - - output_np = linear_interp_np( - input_np, - out_w, - self.out_size, - self.actual_shape, - self.align_corners, - self.align_mode, - self.data_layout, - ) - self.inputs = {'X': input_np} - if self.out_size is not None: - self.inputs['OutSize'] = self.out_size - if self.actual_shape is not None: - self.inputs['OutSize'] = self.actual_shape - - self.attrs = { - 'out_w': self.out_w, - 'scale': self.scale, - 'interp_method': self.interp_method, - 'align_corners': self.align_corners, - 'align_mode': self.align_mode, - 'data_layout': self.data_layout, - } - self.outputs = {'Out': output_np} - - def test_check_output(self): - if platform.system() == "Linux": - self.check_output(atol=1e-7, check_dygraph=False) - else: - self.check_output(atol=1e-5, check_dygraph=False) - - def test_check_grad(self): - self.check_grad(['X'], 'Out', in_place=True, check_dygraph=False) - - def init_test_case(self): - self.interp_method = 'linear' - self.input_shape = [1, 3, 100] - self.out_w = 50 - self.scale = 0.0 - self.out_size = np.array( - [ - 50, - ] - ).astype("int32") - self.align_corners = False - self.align_mode = 1 - - -class TestLinearInterpOpDataLayout(TestLinearInterpOp): - def init_test_case(self): - self.interp_method = 'linear' - self.input_shape = [1, 3, 100] - self.out_w = 50 - self.scale = 0.0 - self.out_size = np.array( - [ - 50, - ] - ).astype("int32") - self.align_corners = False - self.align_mode = 1 - self.data_layout = 'NHWC' - - -class TestLinearInterpOpAlignMode(TestLinearInterpOp): - def init_test_case(self): - self.interp_method = 'linear' - self.input_shape = [1, 3, 100] - self.out_w = 50 - self.scale = 0.0 - self.out_size = np.array( - [ - 50, - ] - ).astype("int32") - self.align_corners = False - self.align_mode = 0 - - -class TestLinearInterpOpScale(TestLinearInterpOp): - def init_test_case(self): - self.interp_method = 'linear' - self.input_shape = [1, 3, 100] - self.out_w = 50 - self.scale = 0.5 - self.out_size = np.array( - [ - 50, - ] - ).astype("int32") - self.align_corners = False - self.align_mode = 0 - - -class TestLinearInterpOpSizeTensor(TestLinearInterpOp): - def setUp(self): - self.out_size = None - self.actual_shape = None - self.data_layout = 'NCHW' - self.init_test_case() - self.op_type = "linear_interp" - input_np = np.random.random(self.input_shape).astype("float64") - self.shape_by_1Dtensor = False - self.scale_by_1Dtensor = False - - if self.data_layout == "NCHW": - in_w = self.input_shape[2] - else: - in_w = self.input_shape[1] - - if self.scale > 0: - out_w = int(in_w * self.scale) - else: - out_w = self.out_w - - output_np = linear_interp_np( - input_np, - out_w, - self.out_size, - self.actual_shape, - self.align_corners, - self.align_mode, - self.data_layout, - ) - - self.inputs = {'X': input_np} - if self.out_size is not None and self.shape_by_1Dtensor: - self.inputs['OutSize'] = self.out_size - elif self.actual_shape is not None and self.shape_by_1Dtensor: - self.inputs['OutSize'] = self.actual_shape - else: - size_tensor = [] - for index, ele in enumerate(self.out_size): - size_tensor.append( - ("x" + str(index), np.ones(1).astype('int32') * ele) - ) - self.inputs['SizeTensor'] = size_tensor - - self.attrs = { - 'out_w': self.out_w, - 'scale': self.scale, - 'interp_method': self.interp_method, - 'align_corners': self.align_corners, - 'align_mode': self.align_mode, - 'data_layout': self.data_layout, - } - self.outputs = {'Out': output_np} - - -class TestLinearInterpOpAPI2_0(unittest.TestCase): - def test_case(self): - # dygraph - x_data = np.random.random((1, 3, 128)).astype("float32") - us_1 = paddle.nn.Upsample( - size=[ - 64, - ], - mode='linear', - align_mode=1, - align_corners=False, - data_format='NCW', - ) - with base.dygraph.guard(): - x = paddle.to_tensor(x_data) - interp = us_1(x) - - expect = linear_interp_np( - x_data, out_w=64, align_mode=1, align_corners=False - ) - - np.testing.assert_allclose(interp.numpy(), expect, rtol=1e-05) - - -class TestResizeLinearOpUint8(OpTest): - def setUp(self): - self.out_size = None - self.actual_shape = None - self.init_test_case() - self.op_type = "linear_interp" - input_np = np.random.random(self.input_shape).astype("uint8") - - if self.scale > 0: - out_w = int(self.input_shape[3] * self.scale) - else: - out_w = self.out_w - - output_np = linear_interp_np( - input_np, - out_w, - self.out_size, - self.actual_shape, - self.align_corners, - self.align_mode, - ) - self.inputs = {'X': input_np} - if self.out_size is not None: - self.inputs['OutSize'] = self.out_size - - self.attrs = { - 'out_w': self.out_w, - 'scale': self.scale, - 'interp_method': self.interp_method, - 'align_corners': self.align_corners, - 'align_mode': self.align_mode, - } - self.outputs = {'Out': output_np} - - def test_check_output(self): - if platform.system() == "Linux": - self.check_output_with_place( - place=core.CPUPlace(), atol=1e-7, check_dygraph=False - ) - else: - self.check_output_with_place( - place=core.CPUPlace(), atol=1e-5, check_dygraph=False - ) - - def init_test_case(self): - self.interp_method = 'linear' - self.input_shape = [2, 3, 100] - self.out_w = 50 - self.scale = 0.0 - self.out_size = np.array( - [ - 50, - ] - ).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -class TestLinearInterpOpError(unittest.TestCase): - @test_with_pir_api - def test_error(self): - paddle.enable_static() - with paddle.static.program_guard( - paddle.static.Program(), paddle.static.Program() - ): - - def input_shape_error(): - x1 = paddle.static.data(name="x1", shape=[1], dtype="float32") - out1 = paddle.nn.Upsample( - size=[ - 256, - ], - data_format='NCW', - mode='linear', - ) - out1_res = out1(x1) - - def data_format_error(): - x2 = paddle.static.data( - name="x2", shape=[1, 3, 128], dtype="float32" - ) - out2 = paddle.nn.Upsample( - size=[ - 256, - ], - data_format='NHWCD', - mode='linear', - ) - out2_res = out2(x2) - - def out_shape_error(): - x3 = paddle.static.data( - name="x3", shape=[1, 3, 128], dtype="float32" - ) - out3 = paddle.nn.Upsample( - size=[ - 256, - 256, - ], - data_format='NHWC', - mode='linear', - ) - out3_res = out3(x3) - - self.assertRaises(ValueError, input_shape_error) - self.assertRaises(ValueError, data_format_error) - self.assertRaises(ValueError, out_shape_error) - paddle.disable_static() - - -if __name__ == "__main__": - unittest.main() diff --git a/test/legacy_test/test_lookup_table_op.py b/test/legacy_test/test_lookup_table_op.py deleted file mode 100644 index 42c9844ddd9853..00000000000000 --- a/test/legacy_test/test_lookup_table_op.py +++ /dev/null @@ -1,437 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op import Operator -from op_test import ( - OpTest, - check_out_dtype, - skip_check_grad_ci, -) - -import paddle.nn.functional as F -from paddle.base import core - - -class TestLookupTableOp(OpTest): - def setUp(self): - self.op_type = "lookup_table" - table = np.random.random((17, 31)).astype("float64") - ids = np.random.randint(0, 17, 4).astype("int64") - ids_expand = np.expand_dims(ids, axis=1) - self.inputs = {'W': table, 'Ids': ids_expand} - self.outputs = {'Out': table[ids]} - - def test_check_output(self): - self.check_output(check_cinn=True) - - def test_check_grad(self): - self.check_grad(['W'], 'Out', no_grad_set=set('Ids'), check_cinn=True) - - -class TestLookupTableOpWithTensorIds(OpTest): - def setUp(self): - self.op_type = "lookup_table" - table = np.random.random((17, 31)).astype("float64") - ids = np.random.randint(low=0, high=17, size=(2, 4, 5, 1)).astype( - "int64" - ) - self.inputs = {'W': table, 'Ids': ids} - self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} - - def test_check_output(self): - self.check_output(check_cinn=True) - - def test_check_grad(self): - self.check_grad(['W'], 'Out', no_grad_set=set('Ids'), check_cinn=True) - - -@skip_check_grad_ci( - reason="Since paddings are not trainable and fixed in forward," - "the gradient of paddings makes no sense and we don't " - "test the gradient here." -) -class TestLookupTableOpWithPadding(TestLookupTableOp): - def test_check_output(self): - ids = np.squeeze(self.inputs['Ids']) - padding_idx = np.random.choice(ids, 1)[0] - self.outputs['Out'][ids == padding_idx] = np.zeros(31) - self.attrs = {'padding_idx': int(padding_idx)} - self.check_output(check_cinn=True) - - -@skip_check_grad_ci( - reason="Since paddings are not trainable and fixed in forward," - "the gradient of paddings makes no sense and we don't " - "test the gradient here." -) -class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds): - def test_check_output(self): - ids = self.inputs['Ids'] - flatten_idx = ids.flatten() - padding_idx = np.random.choice(flatten_idx, 1)[0] - self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31) - self.attrs = {'padding_idx': padding_idx} - self.check_output(check_cinn=True) - - -class TestLookupTableWIsSelectedRows(unittest.TestCase): - def prepare_ids(self, scope, place): - ids_tensor = scope.var('Ids').get_tensor() - ids_array = np.array([[0], [4], [3], [5]]).astype("int64") - ids_tensor.set(ids_array, place) - return ids_array - - def prepare_w(self, scope, place): - rows = [0, 1, 2, 3, 4, 5, 6] - row_numel = 12 - - w_selected_rows = scope.var('W').get_selected_rows() - w_selected_rows.set_height(len(rows)) - w_selected_rows.set_rows(rows) - w_array = np.ones((len(rows), row_numel)).astype("float32") - for i in range(len(rows)): - w_array[i] *= i - w_tensor = w_selected_rows.get_tensor() - w_tensor.set(w_array, place) - - def create_out_tensor(self, scope, place): - return scope.var('Out').get_tensor() - - def check_result(self, ids_array, result_array): - # all(): return True if all elements of the iterable are true (or if the iterable is empty) - for idx, row in enumerate(ids_array): - assert (row[0] == result_array[idx]).all() - - def check_with_place(self, place): - scope = core.Scope() - - ids_array = self.prepare_ids(scope, place) - - self.prepare_w(scope, place) - - out_tensor = self.create_out_tensor(scope, place) - - # create and run lookup_table operator - lookup_table = Operator("lookup_table", W='W', Ids='Ids', Out='Out') - lookup_table.run(scope, place) - - # get result from Out - result_array = np.array(out_tensor) - - self.check_result(ids_array, result_array) - - def test_w_is_selected_rows(self): - places = [core.CPUPlace()] - # currently only support CPU - for place in places: - self.check_with_place(place) - - -class TestLookupTableWithTensorIdsWIsSelectedRows( - TestLookupTableWIsSelectedRows -): - def prepare_ids(self, scope, place): - ids_tensor = scope.var('Ids').get_tensor() - ids_array = np.random.randint(low=0, high=6, size=(2, 4, 3, 1)).astype( - "int64" - ) - ids_tensor.set(ids_array, place) - return ids_array - - def check_result(self, ids_array, result_array): - for idx, row in np.ndenumerate(ids_array): - assert (row == result_array[idx]).all() - - -class TestLookupTableOpInt8(OpTest): - def setUp(self): - self.op_type = "lookup_table" - table = np.random.randint(low=-128, high=127, size=(17, 31)).astype( - "int8" - ) - ids = np.random.randint(0, 17, 4).astype("int64") - ids_expand = np.expand_dims(ids, axis=1) - self.inputs = {'W': table, 'Ids': ids_expand} - self.outputs = {'Out': table[ids]} - - def test_check_output(self): - self.check_output(check_cinn=True) - - def test_check_grad(self): - # since int8 type only be used in test and inference, there is - # no gradient implement, so we don't need to test it - pass - - -class TestLookupTableOpWithTensorIdsInt8(OpTest): - def setUp(self): - self.op_type = "lookup_table" - table = np.random.randint(low=-128, high=127, size=(17, 31)).astype( - "int8" - ) - ids = np.random.randint(low=0, high=17, size=(2, 4, 5, 1)).astype( - "int64" - ) - self.inputs = {'W': table, 'Ids': ids} - self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} - - def test_check_output(self): - self.check_output(check_cinn=True) - - def test_check_grad(self): - # since int8 type only be used in test and inference, there is - # no gradient implement, so we don't need to test it - pass - - -class TestLookupTableOpWithPaddingInt8(TestLookupTableOpInt8): - def test_check_output(self): - ids = np.squeeze(self.inputs['Ids']) - padding_idx = np.random.choice(ids, 1)[0] - self.outputs['Out'][ids == padding_idx] = np.zeros(31) - self.attrs = {'padding_idx': int(padding_idx)} - self.check_output(check_cinn=True) - - def test_check_grad(self): - # Since paddings are not trainable and fixed in forward, the gradient of - # paddings makes no sense and we don't test the gradient here. - pass - - -class TestLookupTableOpWithTensorIdsAndPaddingInt8( - TestLookupTableOpWithTensorIdsInt8 -): - def test_check_output(self): - ids = self.inputs['Ids'] - flatten_idx = ids.flatten() - padding_idx = np.random.choice(flatten_idx, 1)[0] - self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31) - self.attrs = {'padding_idx': padding_idx} - self.check_output(check_cinn=True) - - def test_check_grad(self): - # Since paddings are not trainable and fixed in forward, the gradient of - # paddings makes no sense and we don't test the gradient here. - pass - - -class TestLookupTableWIsSelectedRowsInt8(unittest.TestCase): - def prepare_ids(self, scope, place): - ids_tensor = scope.var('Ids').get_tensor() - ids_array = np.array([[0], [4], [3], [5]]).astype("int64") - ids_tensor.set(ids_array, place) - return ids_array - - def prepare_w(self, scope, place): - rows = [0, 1, 2, 3, 4, 5, 6] - row_numel = 12 - - w_selected_rows = scope.var('W').get_selected_rows() - w_selected_rows.set_height(len(rows)) - w_selected_rows.set_rows(rows) - w_array = np.ones((len(rows), row_numel)).astype("int8") - for i in range(len(rows)): - w_array[i] *= i - w_tensor = w_selected_rows.get_tensor() - w_tensor.set(w_array, place) - - def create_out_tensor(self, scope, place): - return scope.var('Out').get_tensor() - - def check_result(self, ids_array, result_array): - # all(): return True if all elements of the iterable are true (or if the iterable is empty) - for idx, row in enumerate(ids_array): - assert (row[0] == result_array[idx]).all() - - def check_with_place(self, place): - scope = core.Scope() - - ids_array = self.prepare_ids(scope, place) - - self.prepare_w(scope, place) - - out_tensor = self.create_out_tensor(scope, place) - - # create and run lookup_table operator - lookup_table = Operator("lookup_table", W='W', Ids='Ids', Out='Out') - lookup_table.run(scope, place) - - # get result from Out - result_array = np.array(out_tensor) - - self.check_result(ids_array, result_array) - - def test_w_is_selected_rows(self): - places = [core.CPUPlace()] - # currently only support CPU - for place in places: - self.check_with_place(place) - - -class TestLookupTableWithTensorIdsWIsSelectedRowsInt8( - TestLookupTableWIsSelectedRowsInt8 -): - def prepare_ids(self, scope, place): - ids_tensor = scope.var('Ids').get_tensor() - ids_array = np.random.randint(low=0, high=6, size=(2, 4, 3, 1)).astype( - "int64" - ) - ids_tensor.set(ids_array, place) - return ids_array - - def check_result(self, ids_array, result_array): - for idx, row in np.ndenumerate(ids_array): - assert (row == result_array[idx]).all() - - -@skip_check_grad_ci(reason="Int16 type only be used in test and inference.") -class TestLookupTableOpInt16(OpTest): - def setUp(self): - self.op_type = "lookup_table" - table = np.random.randint(low=-128, high=127, size=(17, 31)).astype( - "int16" - ) - ids = np.random.randint(0, 17, 4).astype("int64") - ids_expand = np.expand_dims(ids, axis=1) - self.inputs = {'W': table, 'Ids': ids_expand} - self.outputs = {'Out': table[ids]} - - def test_check_output(self): - self.check_output(check_cinn=True) - - -@skip_check_grad_ci(reason="Int16 type only be used in test and inference.") -class TestLookupTableOpWithTensorIdsInt16(OpTest): - def setUp(self): - self.op_type = "lookup_table" - table = np.random.randint(low=-128, high=127, size=(17, 31)).astype( - "int16" - ) - ids = np.random.randint(low=0, high=17, size=(2, 4, 5, 1)).astype( - "int64" - ) - self.inputs = {'W': table, 'Ids': ids} - self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} - - def test_check_output(self): - self.check_output(check_cinn=True) - - -@skip_check_grad_ci(reason="Int16 type only be used in test and inference.") -class TestLookupTableOpWithPaddingInt16(TestLookupTableOpInt16): - def test_check_output(self): - ids = np.squeeze(self.inputs['Ids']) - padding_idx = np.random.choice(ids, 1)[0] - self.outputs['Out'][ids == padding_idx] = np.zeros(31) - self.attrs = {'padding_idx': int(padding_idx)} - self.check_output(check_cinn=True) - - -@skip_check_grad_ci(reason="Int16 type only be used in test and inference.") -class TestLookupTableOpWithTensorIdsAndPaddingInt16( - TestLookupTableOpWithTensorIdsInt16 -): - def test_check_output(self): - ids = self.inputs['Ids'] - flatten_idx = ids.flatten() - padding_idx = np.random.choice(flatten_idx, 1)[0] - self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31) - self.attrs = {'padding_idx': padding_idx} - self.check_output(check_cinn=True) - - -class TestLookupTableWIsSelectedRowsInt16(unittest.TestCase): - def prepare_ids(self, scope, place): - ids_tensor = scope.var('Ids').get_tensor() - ids_array = np.array([[0], [4], [3], [5]]).astype("int64") - ids_tensor.set(ids_array, place) - return ids_array - - def prepare_w(self, scope, place): - rows = [0, 1, 2, 3, 4, 5, 6] - row_numel = 12 - - w_selected_rows = scope.var('W').get_selected_rows() - w_selected_rows.set_height(len(rows)) - w_selected_rows.set_rows(rows) - w_array = np.ones((len(rows), row_numel)).astype("int16") - for i in range(len(rows)): - w_array[i] *= i - w_tensor = w_selected_rows.get_tensor() - w_tensor.set(w_array, place) - - def create_out_tensor(self, scope, place): - return scope.var('Out').get_tensor() - - def check_result(self, ids_array, result_array): - for idx, row in enumerate(ids_array): - assert (row[0] == result_array[idx]).all() - - def check_with_place(self, place): - scope = core.Scope() - - ids_array = self.prepare_ids(scope, place) - - self.prepare_w(scope, place) - - out_tensor = self.create_out_tensor(scope, place) - - # create and run lookup_table operator - lookup_table = Operator("lookup_table", W='W', Ids='Ids', Out='Out') - lookup_table.run(scope, place) - - # get result from Out - result_array = np.array(out_tensor) - - self.check_result(ids_array, result_array) - - def test_w_is_selected_rows(self): - places = [core.CPUPlace()] - # currently only support CPU - for place in places: - self.check_with_place(place) - - -class TestLookupTableWithTensorIdsWIsSelectedRowsInt16( - TestLookupTableWIsSelectedRowsInt16 -): - def prepare_ids(self, scope, place): - ids_tensor = scope.var('Ids').get_tensor() - ids_array = np.random.randint(low=0, high=6, size=(2, 4, 3, 1)).astype( - "int64" - ) - ids_tensor.set(ids_array, place) - return ids_array - - def check_result(self, ids_array, result_array): - for idx, row in np.ndenumerate(ids_array): - assert (row == result_array[idx]).all() - - -class TestOutDtype(unittest.TestCase): - def test_dtype(self): - api_fn = F.embedding - check_out_dtype( - api_fn, - in_specs=[([10, 16], 'int64'), ([100, 64],)], - expect_dtypes=['float32', 'float64'], - target_index=1, - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/legacy_test/test_matmul_op.py b/test/legacy_test/test_matmul_op.py deleted file mode 100644 index 2d264bff97c308..00000000000000 --- a/test/legacy_test/test_matmul_op.py +++ /dev/null @@ -1,249 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest, paddle_static_guard - -import paddle -from paddle import base -from paddle.pir_utils import test_with_pir_api - - -def generate_compatible_shapes(dim_X, dim_Y, transpose_X, transpose_Y): - BATCH_SIZE = 2 - M = 3 - N = 4 - K = 5 - if (dim_X == 1 and transpose_X) or (dim_Y == 1 and transpose_Y): - K = 1 - if dim_X == 1: - if transpose_X: - shape_X = [M] - else: - shape_X = [K] - if dim_Y == 1: - if transpose_Y: - shape_Y = [N] - else: - shape_Y = [K] - if dim_X >= 2: - if transpose_X: - shape_X = [K, M] - else: - shape_X = [M, K] - if dim_X == 3: - shape_X = [BATCH_SIZE] + shape_X - if dim_Y >= 2: - if transpose_Y: - shape_Y = [N, K] - else: - shape_Y = [K, N] - if dim_Y == 3: - shape_Y = [BATCH_SIZE] + shape_Y - return shape_X, shape_Y - - -def reference_matmul(X, Y, transpose_X=False, transpose_Y=False): - """Reference forward implementation using np.matmul.""" - # np.matmul does not support the transpose flags, so we manually - # transpose X and Y appropriately. - if transpose_X: - if X.ndim == 1: - X = X.reshape((X.size, 1)) - elif X.ndim == 2: - X = X.T - else: - dim = list(range(len(X.shape))) - dim[-1], dim[len(X.shape) - 2] = dim[len(X.shape) - 2], dim[-1] - X = np.transpose(X, tuple(dim)) - if transpose_Y: - if Y.ndim == 1: - Y = Y.reshape((1, Y.size)) - else: - dim = list(range(len(Y.shape))) - dim[-1], dim[len(Y.shape) - 2] = dim[len(Y.shape) - 2], dim[-1] - Y = np.transpose(Y, tuple(dim)) - - Out = np.matmul(X, Y) - return Out - - -class Generator: - def setUp(self): - self.op_type = "matmul" - X = np.random.random(self.shape_X).astype("float32") - Y = np.random.random(self.shape_Y).astype("float32") - Out = reference_matmul(X, Y, self.transpose_X, self.transpose_Y) - self.inputs = {'X': X, 'Y': Y} - self.attrs = { - 'transpose_X': self.transpose_X, - 'transpose_Y': self.transpose_Y, - } - self.outputs = {'Out': Out} - - def test_check_output(self): - self.check_output(check_cinn=True) - - def test_check_grad_normal(self): - self.check_grad( - ['X', 'Y'], - 'Out', - max_relative_error=1e-3, - check_cinn=True, - ) - - def test_check_grad_ignore_x(self): - self.check_grad( - ['Y'], - 'Out', - max_relative_error=1e-3, - no_grad_set=set("X"), - check_cinn=True, - ) - - def test_check_grad_ignore_y(self): - self.check_grad( - ['X'], - 'Out', - max_relative_error=1e-3, - no_grad_set=set('Y'), - check_cinn=True, - ) - - -# Test case n-dim -def generate_compatible_shapes_ndim(dim, transpose_X, transpose_Y): - M = 2 - N = 4 - K = 3 - shape_X = [2 for _ in range(dim - 2)] - shape_Y = [2 for _ in range(dim - 2)] - - if transpose_X: - shape_X += [K, M] - else: - shape_X += [M, K] - - if transpose_Y: - shape_Y += [N, K] - else: - shape_Y += [K, N] - - return shape_X, shape_Y - - -# # Test case n-dim -for dim in [4]: - for transpose_X in [False, True]: - for transpose_Y in [False, True]: - test_name = f'TestMatMulOp_dimX_{dim}_dim_Y_{dim}_transX_{transpose_X}_transY_{transpose_Y}' - shape_X, shape_Y = generate_compatible_shapes_ndim( - dim, transpose_X, transpose_Y - ) - globals()[test_name] = type( - test_name, - (Generator, OpTest), - { - 'shape_X': shape_X, - 'shape_Y': shape_Y, - 'transpose_X': transpose_X, - 'transpose_Y': transpose_Y, - }, - ) - - -class API_TestMm(unittest.TestCase): - @test_with_pir_api - def test_out(self): - with paddle_static_guard(): - with paddle.base.program_guard(paddle.base.Program()): - x = paddle.static.data(name="x", shape=[2], dtype="float64") - y = paddle.static.data(name='y', shape=[2], dtype='float64') - result = paddle.mm(x, y) - exe = base.Executor(base.CPUPlace()) - data1 = np.random.rand(2) - data2 = np.random.rand(2) - np_res = exe.run( - feed={'x': data1, 'y': data2}, fetch_list=[result] - ) - expected_result = np.matmul(data1, data2) - - np.testing.assert_allclose( - np_res, - expected_result, - rtol=1e-05, - atol=1e-05, - err_msg=f'two value is {np_res}\n{expected_result}, check diff!', - ) - - def test_dygraph_without_out(self): - device = base.CPUPlace() - with base.dygraph.guard(device): - input_array1 = np.random.rand(3, 4).astype("float64") - input_array2 = np.random.rand(4, 3).astype("float64") - data1 = paddle.to_tensor(input_array1) - data2 = paddle.to_tensor(input_array2) - out = paddle.mm(data1, data2) - expected_result = np.matmul(input_array1, input_array2) - np.testing.assert_allclose(expected_result, out.numpy(), rtol=1e-05) - - -class Test_API_Matmul(unittest.TestCase): - def test_dygraph_without_out(self): - device = base.CPUPlace() - with base.dygraph.guard(device): - input_array1 = np.random.rand(3, 4).astype("float64") - input_array2 = np.random.rand(4, 3).astype("float64") - data1 = paddle.to_tensor(input_array1) - data2 = paddle.to_tensor(input_array2) - out = paddle.matmul(data1, data2) - expected_result = np.matmul(input_array1, input_array2) - np.testing.assert_allclose(expected_result, out.numpy(), rtol=1e-05) - - -class API_TestMmError(unittest.TestCase): - @test_with_pir_api - def test_errors(self): - paddle.enable_static() - with paddle.static.program_guard( - paddle.static.Program(), paddle.static.Program() - ): - - def test_error1(): - data1 = paddle.static.data( - name="data1", shape=[10, 2], dtype="float32" - ) - data2 = paddle.static.data( - name="data2", shape=[3, 10], dtype="float32" - ) - paddle.mm(data1, data2) - - self.assertRaises(ValueError, test_error1) - - def test_error2(): - data3 = paddle.static.data( - name="data3", shape=[10, 10, 2], dtype="float32" - ) - data4 = paddle.static.data( - name="data4", shape=[3, 2, 10], dtype="float32" - ) - paddle.mm(data3, data4) - - self.assertRaises(ValueError, test_error2) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/legacy_test/test_nearest_interp_op.py b/test/legacy_test/test_nearest_interp_op.py deleted file mode 100755 index 3b09cab3eacee0..00000000000000 --- a/test/legacy_test/test_nearest_interp_op.py +++ /dev/null @@ -1,471 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - -from paddle.base import core - - -def nearest_neighbor_interp_np( - X, - out_h, - out_w, - out_size=None, - actual_shape=None, - align_corners=True, - data_layout='NCHW', -): - """nearest neighbor interpolation implement in shape [N, C, H, W]""" - if data_layout == "NHWC": - X = np.transpose(X, (0, 3, 1, 2)) # NHWC => NCHW - if out_size is not None: - out_h = out_size[0] - out_w = out_size[1] - if actual_shape is not None: - out_h = actual_shape[0] - out_w = actual_shape[1] - n, c, in_h, in_w = X.shape - - ratio_h = ratio_w = 0.0 - if out_h > 1: - if align_corners: - ratio_h = (in_h - 1.0) / (out_h - 1.0) - else: - ratio_h = 1.0 * in_h / out_h - if out_w > 1: - if align_corners: - ratio_w = (in_w - 1.0) / (out_w - 1.0) - else: - ratio_w = 1.0 * in_w / out_w - - out = np.zeros((n, c, out_h, out_w)) - - if align_corners: - for i in range(out_h): - in_i = int(ratio_h * i + 0.5) - for j in range(out_w): - in_j = int(ratio_w * j + 0.5) - out[:, :, i, j] = X[:, :, in_i, in_j] - else: - for i in range(out_h): - in_i = int(ratio_h * i) - for j in range(out_w): - in_j = int(ratio_w * j) - out[:, :, i, j] = X[:, :, in_i, in_j] - - if data_layout == "NHWC": - out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC - - return out.astype(X.dtype) - - -class TestNearestInterpOp(OpTest): - def setUp(self): - self.out_size = None - self.actual_shape = None - self.data_layout = 'NCHW' - self.init_test_case() - self.op_type = "nearest_interp" - input_np = np.random.random(self.input_shape).astype("float64") - - if self.data_layout == "NCHW": - in_h = self.input_shape[2] - in_w = self.input_shape[3] - else: - in_h = self.input_shape[1] - in_w = self.input_shape[2] - - if self.scale > 0: - out_h = int(in_h * self.scale) - out_w = int(in_w * self.scale) - else: - out_h = self.out_h - out_w = self.out_w - - output_np = nearest_neighbor_interp_np( - input_np, - out_h, - out_w, - self.out_size, - self.actual_shape, - self.align_corners, - self.data_layout, - ) - self.inputs = {'X': input_np} - if self.out_size is not None: - self.inputs['OutSize'] = self.out_size - if self.actual_shape is not None: - self.inputs['OutSize'] = self.actual_shape - self.attrs = { - 'out_h': self.out_h, - 'out_w': self.out_w, - 'scale': self.scale, - 'interp_method': self.interp_method, - 'align_corners': self.align_corners, - 'data_layout': self.data_layout, - } - self.outputs = {'Out': output_np} - - def test_check_output(self): - self.check_output(check_dygraph=False) - - def test_check_grad(self): - self.check_grad(['X'], 'Out', in_place=True, check_dygraph=False) - - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [2, 3, 4, 5] - self.out_h = 2 - self.out_w = 2 - self.scale = 0.0 - self.out_size = np.array([3, 3]).astype("int32") - self.align_corners = True - - -class TestNearestNeighborInterpCase1(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [4, 1, 7, 8] - self.out_h = 1 - self.out_w = 1 - self.scale = 0.0 - self.align_corners = True - - -class TestNearestNeighborInterpCase2(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [3, 3, 9, 6] - self.out_h = 12 - self.out_w = 12 - self.scale = 0.0 - self.align_corners = True - - -class TestNearestNeighborInterpCase3(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [1, 1, 32, 64] - self.out_h = 64 - self.out_w = 32 - self.scale = 0.0 - self.align_corners = True - - -class TestNearestNeighborInterpCase4(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [4, 1, 7, 8] - self.out_h = 1 - self.out_w = 1 - self.scale = 0.0 - self.out_size = np.array([2, 2]).astype("int32") - self.align_corners = True - - -class TestNearestNeighborInterpCase5(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [3, 3, 9, 6] - self.out_h = 12 - self.out_w = 12 - self.scale = 0.0 - self.out_size = np.array([11, 11]).astype("int32") - self.align_corners = True - - -class TestNearestNeighborInterpCase6(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [1, 1, 32, 64] - self.out_h = 64 - self.out_w = 32 - self.scale = 0.0 - self.out_size = np.array([65, 129]).astype("int32") - self.align_corners = True - - -class TestNearestNeighborInterpSame(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [2, 3, 32, 64] - self.out_h = 32 - self.out_w = 64 - self.scale = 0.0 - self.align_corners = True - - -class TestNearestNeighborInterpActualShape(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [3, 2, 32, 16] - self.out_h = 64 - self.out_w = 32 - self.scale = 0.0 - self.out_size = np.array([66, 40]).astype("int32") - self.align_corners = True - - -class TestNearestNeighborInterpDataLayout(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [2, 4, 4, 5] - self.out_h = 2 - self.out_w = 2 - self.scale = 0.0 - self.out_size = np.array([3, 8]).astype("int32") - self.align_corners = True - self.data_layout = "NHWC" - - -class TestNearestInterpOpUint8(OpTest): - def setUp(self): - self.out_size = None - self.actual_shape = None - self.init_test_case() - self.op_type = "nearest_interp" - input_np = np.random.randint( - low=0, high=256, size=self.input_shape - ).astype("uint8") - - if self.scale > 0: - out_h = int(self.input_shape[2] * self.scale) - out_w = int(self.input_shape[3] * self.scale) - else: - out_h = self.out_h - out_w = self.out_w - - output_np = nearest_neighbor_interp_np( - input_np, - out_h, - out_w, - self.out_size, - self.actual_shape, - self.align_corners, - ) - self.inputs = {'X': input_np} - if self.out_size is not None: - self.inputs['OutSize'] = self.out_size - self.attrs = { - 'out_h': self.out_h, - 'out_w': self.out_w, - 'scale': self.scale, - 'interp_method': self.interp_method, - 'align_corners': self.align_corners, - } - self.outputs = {'Out': output_np} - - def test_check_output(self): - self.check_output_with_place( - place=core.CPUPlace(), atol=1, check_dygraph=False - ) - - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [1, 3, 9, 6] - self.out_h = 10 - self.out_w = 9 - self.scale = 0.0 - self.align_corners = True - - -class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [2, 3, 32, 64] - self.out_h = 80 - self.out_w = 40 - self.scale = 0.0 - self.align_corners = True - - -class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [4, 1, 7, 8] - self.out_h = 5 - self.out_w = 13 - self.scale = 0.0 - self.out_size = np.array([6, 15]).astype("int32") - self.align_corners = True - - -class TestNearestInterpWithoutCorners(TestNearestInterpOp): - def set_align_corners(self): - self.align_corners = False - - -class TestNearestNeighborInterpScale1(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [3, 2, 7, 5] - self.out_h = 64 - self.out_w = 32 - self.scale = 2.0 - self.out_size = np.array([66, 40]).astype("int32") - self.align_corners = True - - -class TestNearestNeighborInterpScale2(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [3, 2, 5, 7] - self.out_h = 64 - self.out_w = 32 - self.scale = 1.5 - self.out_size = np.array([66, 40]).astype("int32") - self.align_corners = True - - -class TestNearestNeighborInterpScale3(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [3, 2, 7, 5] - self.out_h = 64 - self.out_w = 32 - self.scale = 1.0 - self.out_size = np.array([66, 40]).astype("int32") - self.align_corners = True - - -class TestNearestInterpOp_attr_tensor(OpTest): - def setUp(self): - self.out_size = None - self.actual_shape = None - self.shape_by_1Dtensor = False - self.scale_by_1Dtensor = False - self.scale_by_2Dtensor = False - self.init_test_case() - self.op_type = "nearest_interp_v2" - self.attrs = { - 'interp_method': self.interp_method, - 'align_corners': self.align_corners, - } - - input_np = np.random.random(self.input_shape).astype("float64") - self.inputs = {'X': input_np} - - if self.scale_by_1Dtensor: - self.inputs['Scale'] = np.array([self.scale]).astype("float32") - out_h = int(self.input_shape[2] * self.scale) - out_w = int(self.input_shape[3] * self.scale) - elif self.scale_by_2Dtensor: - self.inputs['Scale'] = np.array(self.scale).astype("float32") - out_h = int(self.input_shape[2] * self.scale[0]) - out_w = int(self.input_shape[3] * self.scale[1]) - elif self.scale > 0: - out_h = int(self.input_shape[2] * self.scale) - out_w = int(self.input_shape[3] * self.scale) - self.attrs['scale'] = self.scale - else: - out_h = self.out_h - out_w = self.out_w - - if self.shape_by_1Dtensor: - self.inputs['OutSize'] = self.out_size - elif self.out_size is not None: - size_tensor = [] - for index, ele in enumerate(self.out_size): - size_tensor.append( - ("x" + str(index), np.ones(1).astype('int32') * ele) - ) - self.inputs['SizeTensor'] = size_tensor - - self.attrs['out_h'] = self.out_h - self.attrs['out_w'] = self.out_w - output_np = nearest_neighbor_interp_np( - input_np, - out_h, - out_w, - self.out_size, - self.actual_shape, - self.align_corners, - ) - self.outputs = {'Out': output_np} - - def test_check_output(self): - self.check_output(check_dygraph=False) - - def test_check_grad(self): - self.check_grad(['X'], 'Out', in_place=True, check_dygraph=False) - - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [2, 5, 4, 4] - self.out_h = 3 - self.out_w = 3 - self.scale = 0.0 - self.out_size = [3, 3] - self.align_corners = True - - -# out_size is a tensor list -class TestNearestInterp_attr_tensor_Case1(TestNearestInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [3, 3, 9, 6] - self.out_h = 12 - self.out_w = 12 - self.scale = 0.0 - self.out_size = [8, 12] - self.align_corners = True - - -# out_size is a 1-D tensor -class TestNearestInterp_attr_tensor_Case2(TestNearestInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [3, 2, 32, 16] - self.out_h = 64 - self.out_w = 32 - self.scale = 0.0 - self.out_size = np.array([66, 40]).astype("int32") - self.align_corners = True - self.shape_by_1Dtensor = True - - -# scale is a 1-D tensor -class TestNearestInterp_attr_tensor_Case3(TestNearestInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [3, 2, 32, 16] - self.out_h = 64 - self.out_w = 32 - self.scale = 2.0 - self.out_size = None - self.align_corners = True - self.scale_by_1Dtensor = True - - -# scale is a 2-D tensor -class TestNearestInterp_attr_tensor_Case4(TestNearestInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [3, 2, 32, 16] - self.out_h = 64 - self.out_w = 32 - self.scale = [2.0, 2.0] - self.out_size = None - self.align_corners = True - self.scale_by_2Dtensor = True - - -if __name__ == "__main__": - import paddle - - paddle.enable_static() - unittest.main() diff --git a/test/legacy_test/test_trilinear_interp_op.py b/test/legacy_test/test_trilinear_interp_op.py deleted file mode 100755 index d83c1987e700c6..00000000000000 --- a/test/legacy_test/test_trilinear_interp_op.py +++ /dev/null @@ -1,613 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - -from paddle.base import core - - -def trilinear_interp_np( - input, - out_d, - out_h, - out_w, - out_size=None, - actual_shape=None, - align_corners=True, - align_mode=0, - data_layout='NCDHW', -): - """trilinear interpolation implement in shape [N, C, D, H, W]""" - if data_layout == "NDHWC": - input = np.transpose(input, (0, 4, 1, 2, 3)) # NDHWC => NCDHW - if out_size is not None: - out_d = out_size[0] - out_h = out_size[1] - out_w = out_size[2] - if actual_shape is not None: - out_d = actual_shape[0] - out_h = actual_shape[1] - out_w = actual_shape[2] - batch_size, channel, in_d, in_h, in_w = input.shape - - ratio_d = ratio_h = ratio_w = 0.0 - if out_d > 1: - if align_corners: - ratio_d = (in_d - 1.0) / (out_d - 1.0) - else: - ratio_d = 1.0 * in_d / out_d - if out_h > 1: - if align_corners: - ratio_h = (in_h - 1.0) / (out_h - 1.0) - else: - ratio_h = 1.0 * in_h / out_h - if out_w > 1: - if align_corners: - ratio_w = (in_w - 1.0) / (out_w - 1.0) - else: - ratio_w = 1.0 * in_w / out_w - - out = np.zeros((batch_size, channel, out_d, out_h, out_w)) - - for i in range(out_d): - if align_mode == 0 and not align_corners: - d = int(ratio_d * (i + 0.5) - 0.5) - else: - d = int(ratio_d * i) - - d = max(0, d) - did = 1 if d < in_d - 1 else 0 - if align_mode == 0 and not align_corners: - idx_src_d = max(ratio_d * (i + 0.5) - 0.5, 0) - d1lambda = idx_src_d - d - else: - d1lambda = ratio_d * i - d - d2lambda = 1.0 - d1lambda - - for j in range(out_h): - if align_mode == 0 and not align_corners: - h = int(ratio_h * (j + 0.5) - 0.5) - else: - h = int(ratio_h * j) - - h = max(0, h) - hid = 1 if h < in_h - 1 else 0 - if align_mode == 0 and not align_corners: - idx_src_h = max(ratio_h * (j + 0.5) - 0.5, 0) - h1lambda = idx_src_h - h - else: - h1lambda = ratio_h * j - h - h2lambda = 1.0 - h1lambda - - for k in range(out_w): - if align_mode == 0 and not align_corners: - w = int(ratio_w * (k + 0.5) - 0.5) - else: - w = int(ratio_w * k) - w = max(0, w) - wid = 1 if w < in_w - 1 else 0 - if align_mode == 0 and not align_corners: - idx_src_w = max(ratio_w * (k + 0.5) - 0.5, 0) - w1lambda = idx_src_w - w - else: - w1lambda = ratio_w * k - w - w2lambda = 1.0 - w1lambda - - out[:, :, i, j, k] = d2lambda * ( - h2lambda - * ( - w2lambda * input[:, :, d, h, w] - + w1lambda * input[:, :, d, h, w + wid] - ) - + h1lambda - * ( - w2lambda * input[:, :, d, h + hid, w] - + w1lambda * input[:, :, d, h + hid, w + wid] - ) - ) + d1lambda * ( - h2lambda - * ( - w2lambda * input[:, :, d + did, h, w] - + w1lambda * input[:, :, d + did, h, w + wid] - ) - + h1lambda - * ( - w2lambda * input[:, :, d + did, h + hid, w] - + w1lambda * input[:, :, d + did, h + hid, w + wid] - ) - ) - if data_layout == "NDHWC": - out = np.transpose(out, (0, 2, 3, 4, 1)) # NCDHW => NDHWC - - return out.astype(input.dtype) - - -class TestTrilinearInterpOp(OpTest): - def setUp(self): - self.out_size = None - self.actual_shape = None - self.data_layout = 'NCDHW' - self.init_test_case() - self.op_type = "trilinear_interp" - # NOTE(dev): some AsDispensible input is not used under imperative mode. - input_np = np.random.random(self.input_shape).astype("float32") - - if self.data_layout == "NCDHW": - in_d = self.input_shape[2] - in_h = self.input_shape[3] - in_w = self.input_shape[4] - else: - in_d = self.input_shape[1] - in_h = self.input_shape[2] - in_w = self.input_shape[3] - - if self.scale > 0: - out_d = int(in_d * self.scale) - out_h = int(in_h * self.scale) - out_w = int(in_w * self.scale) - else: - out_d = self.out_d - out_h = self.out_h - out_w = self.out_w - - output_np = trilinear_interp_np( - input_np, - out_d, - out_h, - out_w, - self.out_size, - self.actual_shape, - self.align_corners, - self.align_mode, - self.data_layout, - ) - self.inputs = {'X': input_np} - if self.out_size is not None: - self.inputs['OutSize'] = self.out_size - if self.actual_shape is not None: - self.inputs['OutSize'] = self.actual_shape - # c++ end treat NCDHW the same way as NCHW - if self.data_layout == 'NCDHW': - data_layout = 'NCHW' - else: - data_layout = 'NHWC' - self.attrs = { - 'out_d': self.out_d, - 'out_h': self.out_h, - 'out_w': self.out_w, - 'scale': self.scale, - 'interp_method': self.interp_method, - 'align_corners': self.align_corners, - 'align_mode': self.align_mode, - 'data_layout': data_layout, - } - self.outputs = {'Out': output_np} - - def test_check_output(self): - # NODE(yjjiang11): This op will be deprecated. - self.check_output(check_dygraph=False) - - def test_check_grad(self): - self.check_grad(['X'], 'Out', in_place=True, check_dygraph=False) - - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [2, 3, 4, 4, 4] - self.out_d = 2 - self.out_h = 2 - self.out_w = 2 - self.scale = 0.0 - self.out_size = np.array([3, 3, 3]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpCase1(TestTrilinearInterpOp): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [2, 1, 7, 8, 9] - self.out_d = 1 - self.out_h = 1 - self.out_w = 1 - self.scale = 0.0 - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpCase2(TestTrilinearInterpOp): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [2, 3, 9, 6, 8] - self.out_d = 12 - self.out_h = 12 - self.out_w = 12 - self.scale = 0.0 - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpCase3(TestTrilinearInterpOp): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [3, 2, 16, 8, 4] - self.out_d = 32 - self.out_h = 16 - self.out_w = 8 - self.scale = 0.0 - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpCase4(TestTrilinearInterpOp): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [4, 1, 7, 8, 9] - self.out_d = 1 - self.out_h = 1 - self.out_w = 1 - self.scale = 0.0 - self.out_size = np.array([2, 2, 2]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpCase5(TestTrilinearInterpOp): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [3, 3, 9, 6, 8] - self.out_d = 12 - self.out_h = 12 - self.out_w = 12 - self.scale = 0.0 - self.out_size = np.array([11, 11, 11]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpCase6(TestTrilinearInterpOp): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [1, 1, 16, 8, 4] - self.out_d = 8 - self.out_h = 32 - self.out_w = 16 - self.scale = 0.0 - self.out_size = np.array([17, 9, 5]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpSame(TestTrilinearInterpOp): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [1, 1, 16, 8, 4] - self.out_d = 16 - self.out_h = 8 - self.out_w = 4 - self.scale = 0.0 - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpSameHW(TestTrilinearInterpOp): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [1, 1, 16, 8, 4] - self.out_d = 8 - self.out_h = 8 - self.out_w = 4 - self.scale = 0.0 - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpActualShape(TestTrilinearInterpOp): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [3, 2, 16, 8, 4] - self.out_d = 64 - self.out_h = 32 - self.out_w = 16 - self.scale = 0.0 - self.out_size = np.array([33, 19, 7]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpDatalayout(TestTrilinearInterpOp): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [2, 4, 4, 4, 3] - self.out_d = 2 - self.out_h = 2 - self.out_w = 2 - self.scale = 0.0 - self.out_size = np.array([3, 3, 3]).astype("int32") - self.align_corners = True - self.align_mode = 1 - self.data_layout = "NDHWC" - - -class TestTrilinearInterpOpUint8(OpTest): - def setUp(self): - self.out_size = None - self.actual_shape = None - self.init_test_case() - self.op_type = "trilinear_interp" - input_np = np.random.randint( - low=0, high=256, size=self.input_shape - ).astype("uint8") - - if self.scale > 0: - out_d = int(self.input_shape[2] * self.scale) - out_h = int(self.input_shape[3] * self.scale) - out_w = int(self.input_shape[4] * self.scale) - else: - out_d = self.out_d - out_h = self.out_h - out_w = self.out_w - - output_np = trilinear_interp_np( - input_np, - out_d, - out_h, - out_w, - self.out_size, - self.actual_shape, - self.align_corners, - self.align_mode, - ) - self.inputs = {'X': input_np} - if self.out_size is not None: - self.inputs['OutSize'] = self.out_size - - self.attrs = { - 'out_d': self.out_d, - 'out_h': self.out_h, - 'out_w': self.out_w, - 'scale': self.scale, - 'interp_method': self.interp_method, - 'align_corners': self.align_corners, - 'align_mode': self.align_mode, - } - self.outputs = {'Out': output_np} - - def test_check_output(self): - self.check_output_with_place( - place=core.CPUPlace(), atol=1, check_dygraph=False - ) - - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [1, 3, 9, 6, 8] - self.out_d = 13 - self.out_h = 10 - self.out_w = 9 - self.scale = 0.0 - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpCase1Uint8(TestTrilinearInterpOpUint8): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [2, 3, 16, 8, 4] - self.out_d = 13 - self.out_h = 7 - self.out_w = 2 - self.scale = 0.0 - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpCase2Uint8(TestTrilinearInterpOpUint8): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [4, 1, 7, 8, 9] - self.out_d = 3 - self.out_h = 5 - self.out_w = 13 - self.scale = 0.0 - self.out_size = np.array([6, 15, 21]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpOtherMethod1(TestTrilinearInterpOp): - def set_align_mode(self): - self.align_corners = False - self.align_mode = 1 - - -class TestTrilinearInterpWithMethod2(TestTrilinearInterpOp): - def set_align_mode(self): - self.align_corners = False - self.align_mode = 0 - - -class TestTrilinearInterpWithMethod3(TestTrilinearInterpOp): - def set_align_mode(self): - self.align_corners = True - self.align_mode = 0 - - -class TestTrilinearInterpScale1(TestTrilinearInterpOp): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [2, 3, 5, 7, 9] - self.out_d = 82 - self.out_h = 60 - self.out_w = 25 - self.scale = 2.0 - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpScale2(TestTrilinearInterpOp): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [2, 3, 5, 7, 9] - self.out_d = 60 - self.out_h = 40 - self.out_w = 25 - self.scale = 1.0 - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpScale3(TestTrilinearInterpOp): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [2, 3, 5, 7, 9] - self.out_d = 60 - self.out_h = 40 - self.out_w = 25 - self.scale = 1.5 - self.align_corners = True - self.align_mode = 1 - - -class TestTrilinearInterpZero(TestTrilinearInterpOp): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [2, 3, 5, 7, 11] - self.out_d = 60 - self.out_h = 40 - self.out_w = 25 - self.scale = 0.2 - self.align_corners = False - self.align_mode = 0 - - -class TestTrilinearInterpOp_attr_tensor(OpTest): - def setUp(self): - self.out_size = None - self.actual_shape = None - self.init_test_case() - self.op_type = "trilinear_interp" - self.shape_by_1Dtensor = False - self.scale_by_1Dtensor = False - self.attrs = { - 'interp_method': self.interp_method, - 'align_corners': self.align_corners, - 'align_mode': self.align_mode, - } - - input_np = np.random.random(self.input_shape).astype("float32") - self.inputs = {'X': input_np} - - if self.scale_by_1Dtensor: - self.inputs['Scale'] = np.array([self.scale]).astype("float32") - elif self.scale > 0: - out_d = int(self.input_shape[2] * self.scale) - out_h = int(self.input_shape[3] * self.scale) - out_w = int(self.input_shape[4] * self.scale) - self.attrs['scale'] = self.scale - else: - out_d = self.out_d - out_h = self.out_h - out_w = self.out_w - - if self.shape_by_1Dtensor: - self.inputs['OutSize'] = self.out_size - elif self.out_size is not None: - size_tensor = [] - for index, ele in enumerate(self.out_size): - size_tensor.append( - ("x" + str(index), np.ones(1).astype('int32') * ele) - ) - self.inputs['SizeTensor'] = size_tensor - - self.attrs['out_d'] = self.out_d - self.attrs['out_h'] = self.out_h - self.attrs['out_w'] = self.out_w - output_np = trilinear_interp_np( - input_np, - out_d, - out_h, - out_w, - self.out_size, - self.actual_shape, - self.align_corners, - self.align_mode, - ) - self.outputs = {'Out': output_np} - - def test_check_output(self): - self.check_output(check_dygraph=False) - - def test_check_grad(self): - self.check_grad(['X'], 'Out', in_place=True, check_dygraph=False) - - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [2, 3, 4, 4, 4] - self.out_d = 2 - self.out_h = 3 - self.out_w = 3 - self.scale = 0.0 - self.out_size = [2, 3, 3] - self.align_corners = True - self.align_mode = 1 - - -# out_size is a 1-D tensor -class TestTrilinearInterp_attr_tensor_Case1(TestTrilinearInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [3, 2, 9, 6, 8] - self.out_d = 32 - self.out_h = 16 - self.out_w = 8 - self.scale = 0.3 - self.out_size = [12, 4, 4] - self.align_corners = True - self.align_mode = 1 - - -# scale is a 1-D tensor -class TestTrilinearInterp_attr_tensor_Case2(TestTrilinearInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [2, 3, 8, 8, 4] - self.out_d = 16 - self.out_h = 12 - self.out_w = 4 - self.scale = 0.0 - self.out_size = [16, 4, 10] - self.align_corners = True - self.align_mode = 1 - self.shape_by_1Dtensor = True - - -# scale is a 1-D tensor -class TestTrilinearInterp_attr_tensor_Case3(TestTrilinearInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = 'trilinear' - self.input_shape = [2, 3, 8, 8, 4] - self.out_d = 16 - self.out_h = 16 - self.out_w = 8 - self.scale = 2.0 - self.out_size = None - self.align_corners = True - self.align_mode = 1 - self.scale_by_1Dtensor = True - - -if __name__ == "__main__": - unittest.main() diff --git a/test/mkldnn/test_bilinear_interp_mkldnn_op.py b/test/mkldnn/test_bilinear_interp_mkldnn_op.py deleted file mode 100644 index 023b07d9ef4679..00000000000000 --- a/test/mkldnn/test_bilinear_interp_mkldnn_op.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import unittest - -import numpy as np -from op_test import OpTest, skip_check_grad_ci - - -def bilinear_interp_mkldnn_np( - input, out_h, out_w, out_size=None, actual_shape=None, data_layout='NCHW' -): - """bilinear interpolation implement in shape [N, C, H, W]""" - if data_layout == "NHWC": - input = np.transpose(input, (0, 3, 1, 2)) # NHWC => NCHW - if out_size is not None: - out_h = out_size[0] - out_w = out_size[1] - if actual_shape is not None: - out_h = actual_shape[0] - out_w = actual_shape[1] - batch_size, channel, in_h, in_w = input.shape - - out = np.zeros((batch_size, channel, out_h, out_w)) - - for oh in range(out_h): - h0 = int(math.floor((oh + 0.5) * in_h / out_h - 0.5)) - h1 = int(math.ceil((oh + 0.5) * in_h / out_h - 0.5)) - h0 = max(h0, 0) - h1 = min(h1, in_h - 1) - Wh = (oh + 0.5) * in_h / out_h - 0.5 - h0 - for ow in range(out_w): - w0 = int(math.floor((ow + 0.5) * in_w / out_w - 0.5)) - w1 = int(math.ceil((ow + 0.5) * in_w / out_w - 0.5)) - w0 = max(w0, 0) - w1 = min(w1, in_w - 1) - Ww = (ow + 0.5) * in_w / out_w - 0.5 - w0 - input_h0_w0 = input[:, :, h0, w0] - input_h1_w0 = input[:, :, h1, w0] - input_h0_w1 = input[:, :, h0, w1] - input_h1_w1 = input[:, :, h1, w1] - out[:, :, oh, ow] = ( - input_h0_w0 * (1 - Wh) * (1 - Ww) - + input_h1_w0 * Wh * (1 - Ww) - + input_h0_w1 * (1 - Wh) * Ww - + input_h1_w1 * Wh * Ww - ) - - if data_layout == "NHWC": - out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC - - return out.astype(input.dtype) - - -@skip_check_grad_ci(reason="Haven not implement interpolate grad kernel.") -class TestBilinearInterpMKLDNNOp(OpTest): - def init_test_case(self): - pass - - def setUp(self): - self.op_type = "bilinear_interp" - self.interp_method = 'bilinear' - self._cpu_only = True - self.use_mkldnn = True - self.input_shape = [1, 1, 2, 2] - self.data_layout = 'NCHW' - # priority: actual_shape > out_size > scale > out_h & out_w - self.out_h = 1 - self.out_w = 1 - self.scale = 2.0 - self.out_size = None - self.actual_shape = None - - self.init_test_case() - - input_np = np.random.random(self.input_shape).astype("float32") - if self.data_layout == "NCHW": - in_h = self.input_shape[2] - in_w = self.input_shape[3] - else: - in_h = self.input_shape[1] - in_w = self.input_shape[2] - - if self.scale > 0: - out_h = int(in_h * self.scale) - out_w = int(in_w * self.scale) - else: - out_h = self.out_h - out_w = self.out_w - - output_np = bilinear_interp_mkldnn_np( - input_np, - out_h, - out_w, - self.out_size, - self.actual_shape, - self.data_layout, - ) - - self.inputs = {'X': input_np} - if self.out_size is not None: - self.inputs['OutSize'] = self.out_size - if self.actual_shape is not None: - self.inputs['OutSize'] = self.actual_shape - self.attrs = { - 'interp_method': self.interp_method, - 'out_h': self.out_h, - 'out_w': self.out_w, - 'scale': self.scale, - 'data_layout': self.data_layout, - 'use_mkldnn': self.use_mkldnn, - } - self.outputs = {'Out': output_np} - - def test_check_output(self): - self.check_output(check_dygraph=False) - - -class TestBilinearInterpOpMKLDNNNHWC(TestBilinearInterpMKLDNNOp): - def init_test_case(self): - self.input_shape = [3, 2, 32, 16] - self.out_h = 27 - self.out_w = 49 - self.scale = 2.0 - self.data_layout = 'NHWC' - - -class TestBilinearNeighborInterpMKLDNNCase2(TestBilinearInterpMKLDNNOp): - def init_test_case(self): - self.input_shape = [3, 3, 9, 6] - self.out_h = 12 - self.out_w = 12 - self.scale = 1.0 - - -class TestBilinearNeighborInterpDataLayout(TestBilinearInterpMKLDNNOp): - def init_test_case(self): - self.input_shape = [2, 4, 4, 5] - self.out_h = 6 - self.out_w = 7 - self.scale = 0.0 - self.data_layout = "NHWC" - - -class TestBilinearNeighborInterpCase3(TestBilinearInterpMKLDNNOp): - def init_test_case(self): - self.input_shape = [1, 1, 32, 64] - self.out_h = 64 - self.out_w = 128 - self.scale = 0.0 - - -class TestBilinearNeighborInterpCase4(TestBilinearInterpMKLDNNOp): - def init_test_case(self): - self.input_shape = [4, 1, 7, 8] - self.out_h = 1 - self.out_w = 1 - self.scale = 0.0 - self.out_size = np.array([2, 2]).astype("int32") - - -class TestBilinearNeighborInterpCase5(TestBilinearInterpMKLDNNOp): - def init_test_case(self): - self.input_shape = [1, 1, 9, 6] - self.out_h = 12 - self.out_w = 12 - self.scale = 0.0 - self.out_size = np.array([13, 13]).astype("int32") - - -class TestBilinearNeighborInterpCase6(TestBilinearInterpMKLDNNOp): - def init_test_case(self): - self.input_shape = [1, 1, 32, 64] - self.out_h = 64 - self.out_w = 32 - self.scale = 0.0 - self.out_size = np.array([65, 129]).astype("int32") - - -class TestBilinearNeighborInterpSame(TestBilinearInterpMKLDNNOp): - def init_test_case(self): - self.input_shape = [2, 3, 32, 64] - self.out_h = 32 - self.out_w = 64 - self.scale = 0.0 - - -if __name__ == "__main__": - from paddle import enable_static - - enable_static() - unittest.main() diff --git a/test/mkldnn/test_matmul_mkldnn_op.py b/test/mkldnn/test_matmul_mkldnn_op.py deleted file mode 100644 index 85a6d79de97592..00000000000000 --- a/test/mkldnn/test_matmul_mkldnn_op.py +++ /dev/null @@ -1,260 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import unittest - -import numpy as np -from op_test import OpTest - - -class TestDnnlMatMulOp(OpTest): - def generate_data(self): - self.x = np.random.random((25, 2, 2)).astype("float32") - self.y = np.random.random((25, 2, 2)).astype("float32") - self.alpha = 1.0 - self.out = self.alpha * np.matmul(self.x, self.y) - - def set_attributes(self): - self.alpha = self.alpha if hasattr(self, 'alpha') else 1.0 - self.attrs = {'alpha': self.alpha} - - def setUp(self): - # Set max isa, otherwise fails on SKX and earlier - os.environ["DNNL_MAX_CPU_ISA"] = "AVX" - self.op_type = "matmul" - self._cpu_only = True - self.use_mkldnn = True - self.generate_data() - self.set_attributes() - self.attrs['use_mkldnn'] = True - - self.inputs = {'X': self.x, 'Y': self.y} - self.outputs = {'Out': self.out} - - def test_check_output(self): - self.check_output() - - -class TestDnnlMatMulWithGradOp(TestDnnlMatMulOp): - def test_check_grad(self): - self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-2) - - -class TestDnnlMatMulOpMixedDims1(TestDnnlMatMulWithGradOp): - def generate_data(self): - self.x = np.random.random((17, 2, 3)).astype("float32") - self.y = np.random.random((3, 4)).astype("float32") - self.out = np.matmul(self.x, self.y) - - -class TestDnnlMatMulOpMixedDimsYWiderTransposeY(TestDnnlMatMulWithGradOp): - def generate_data(self): - self.x = np.random.random((8, 2, 3)).astype("float32") - self.y = np.random.random((4, 3)).astype("float32") - self.out = np.matmul(self.x, np.transpose(self.y)) - - def set_attributes(self): - self.attrs = {'transpose_Y': True} - - -class TestDnnlMatMulOpMixedDimsYWiderTransposeX(TestDnnlMatMulWithGradOp): - def generate_data(self): - self.x = np.random.random((8, 3, 2)).astype("float32") - self.y = np.random.random((3, 4)).astype("float32") - self.out = np.matmul(np.transpose(self.x, (0, 2, 1)), self.y) - - def set_attributes(self): - self.attrs = {'transpose_X': True} - - -class TestDnnlMatMulOpMixedDimsXWiderTransposeXY(TestDnnlMatMulWithGradOp): - def generate_data(self): - self.x = np.random.random((8, 3, 2)).astype("float32") - self.y = np.random.random((4, 3)).astype("float32") - self.out = np.matmul( - np.transpose(self.x, (0, 2, 1)), np.transpose(self.y) - ) - - def set_attributes(self): - self.attrs = {'transpose_X': True, 'transpose_Y': True} - - -class TestDnnlMatMulOpMixedDimsYWiderTransposeXY(TestDnnlMatMulWithGradOp): - def generate_data(self): - self.x = np.random.random((3, 2)).astype("float32") - self.y = np.random.random((8, 4, 3)).astype("float32") - self.out = np.matmul( - np.transpose(self.x), np.transpose(self.y, (0, 2, 1)) - ) - - def set_attributes(self): - self.attrs = {'transpose_X': True, 'transpose_Y': True} - - -class TestDnnlMatMulOpMixedDimsXWiderTransposeX(TestDnnlMatMulWithGradOp): - def generate_data(self): - self.x = np.random.random((5, 4)).astype("float32") - self.y = np.random.random((8, 5, 4)).astype("float32") - self.out = np.matmul(np.transpose(self.x), self.y) - - def set_attributes(self): - self.attrs = {'transpose_X': True} - - -class TestDnnlMatMulOpVectorMultiply(TestDnnlMatMulWithGradOp): - def generate_data(self): - self.x = np.random.random(5).astype("float32") - self.y = np.random.random(5).astype("float32") - self.out = np.matmul(self.x, self.y) - - -class TestDnnlMatMulOpVectorMultiplyTranspose(TestDnnlMatMulWithGradOp): - def generate_data(self): - self.x = np.random.random(5).astype("float32") - x_resized = np.copy(self.x) - x_resized = np.expand_dims(x_resized, 1) - self.y = np.random.random(6).astype("float32") - y_resized = np.copy(self.y) - y_resized = np.expand_dims(y_resized, 0) - self.out = np.matmul(x_resized, y_resized) - - def set_attributes(self): - self.attrs = {'transpose_Y': True, 'transpose_X': True} - - -class TestDnnlMatMulOpMixedDims2(TestDnnlMatMulWithGradOp): - def generate_data(self): - self.x = np.random.random((2, 3)).astype("float32") - self.y = np.random.random((17, 3, 4)).astype("float32") - self.out = np.matmul(self.x, self.y) - - -class TestDnnlMatMulOpAlpha(TestDnnlMatMulWithGradOp): - def generate_data(self): - self.x = np.random.random((17, 2, 3)).astype("float32") - self.y = np.random.random((17, 3, 2)).astype("float32") - self.alpha = 2.0 - self.out = self.alpha * np.matmul(self.x, self.y) - - -class TestDnnlMatMulOp2D(TestDnnlMatMulWithGradOp): - def generate_data(self): - self.x = np.random.random((12, 9)).astype("float32") - self.y = np.random.random((9, 12)).astype("float32") - self.out = np.matmul(self.x, self.y) - - -class TestDnnlMatMulOpTransposeX(TestDnnlMatMulWithGradOp): - def generate_data(self): - self.x = np.random.random((12, 9)).astype("float32") - self.y = np.random.random((12, 9)).astype("float32") - self.out = np.matmul(np.transpose(self.x), self.y) - - def set_attributes(self): - self.attrs = {'transpose_X': True} - - -class TestDnnlMatMulOpTransposeY(TestDnnlMatMulWithGradOp): - def generate_data(self): - self.x = np.random.random((12, 9)).astype("float32") - self.y = np.random.random((12, 9)).astype("float32") - self.out = np.matmul(self.x, np.transpose(self.y)) - - def set_attributes(self): - self.attrs = {'transpose_Y': True} - - -class TestDnnlMatMulOpTransposeY3D(TestDnnlMatMulWithGradOp): - def generate_data(self): - self.x = np.random.random((17, 3, 2)).astype("float32") - self.y = np.random.random((17, 3, 2)).astype("float32") - self.out = np.matmul(self.x, np.transpose(self.y, (0, 2, 1))) - - def set_attributes(self): - self.attrs = {'transpose_Y': True} - - -class TestDnnlMatMulOpInt8NoScales(TestDnnlMatMulOp): - def generate_data(self): - self.x = np.random.random((12, 9)).astype("int8") - self.y = np.random.random((9, 12)).astype("int8") - self.out = np.matmul(self.x, self.y) - - -class TestDnnlMatMulOpInt8(TestDnnlMatMulOp): - # Due to limitation in int8 matmul implementation - # on older platforms (BDW, SKX) we needed to reduce - # range from [-127, 127] to [-63, 63] - def quantize(self, tensor): - scale = 63.0 / np.abs(np.amax(tensor)) - quantized = np.round(scale * tensor).astype("int8") - return scale, quantized - - def generate_data(self): - x_float = np.random.random((12, 9)).astype("float32") - self.x_scale, self.x = self.quantize(x_float) - - y_float = np.random.random((9, 12)).astype("float32") - self.y_scale, self.y = self.quantize(y_float) - - out_float = np.matmul(x_float, y_float) - self.out_scale, self.out = self.quantize(out_float) - - def set_attributes(self): - self.attrs = { - 'Scale_x': self.x_scale, - 'Scale_y': self.y_scale, - 'Scale_out': self.out_scale, - } - - def test_check_output(self): - int_atol = 1 - self.check_output(atol=int_atol) - - -class TestDnnlMatMulOpInt8ForceFP32(TestDnnlMatMulOpInt8): - def generate_data(self): - x_float = np.random.random((12, 9)).astype("float32") - self.x_scale, self.x = self.quantize(x_float) - - y_float = np.random.random((9, 12)).astype("float32") - self.y_scale, self.y = self.quantize(y_float) - - out_float = np.matmul(x_float, y_float) - self.out = out_float - - def set_attributes(self): - self.attrs = { - 'Scale_x': self.x_scale, - 'Scale_y': self.y_scale, - 'force_fp32_output': True, - } - - -class TestDnnlMatMulOpInt8ForceFP32BasicScales(TestDnnlMatMulOp): - def generate_data(self): - self.x = np.random.randint(0, 3, (12, 9)).astype("int8") - self.y = np.random.randint(0, 3, (9, 12)).astype("int8") - self.out = np.matmul(self.x, self.y).astype("float32") - - def set_attributes(self): - self.attrs = {'force_fp32_output': True} - - -if __name__ == "__main__": - from paddle import enable_static - - enable_static() - unittest.main() diff --git a/test/mkldnn/test_nearest_interp_mkldnn_op.py b/test/mkldnn/test_nearest_interp_mkldnn_op.py deleted file mode 100644 index 1e07a605688247..00000000000000 --- a/test/mkldnn/test_nearest_interp_mkldnn_op.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest, skip_check_grad_ci - - -def nearest_neighbor_interp_mkldnn_np( - X, out_h, out_w, out_size=None, actual_shape=None, data_layout='NCHW' -): - """nearest neighbor interpolation implement in shape [N, C, H, W]""" - if data_layout == "NHWC": - X = np.transpose(X, (0, 3, 1, 2)) # NHWC => NCHW - if out_size is not None: - out_h = out_size[0] - out_w = out_size[1] - if actual_shape is not None: - out_h = actual_shape[0] - out_w = actual_shape[1] - - n, c, in_h, in_w = X.shape - - fh = fw = 0.0 - if out_h > 1: - fh = out_h * 1.0 / in_h - if out_w > 1: - fw = out_w * 1.0 / in_w - - out = np.zeros((n, c, out_h, out_w)) - - for oh in range(out_h): - ih = int(round((oh + 0.5) / fh - 0.5)) - for ow in range(out_w): - iw = int(round((ow + 0.5) / fw - 0.5)) - out[:, :, oh, ow] = X[:, :, ih, iw] - - if data_layout == "NHWC": - out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC - - return out.astype(X.dtype) - - -@skip_check_grad_ci(reason="Haven not implement interpolate grad kernel.") -class TestNearestInterpMKLDNNOp(OpTest): - def init_test_case(self): - pass - - def init_data_type(self): - self.dtype = np.float32 - - def setUp(self): - self.op_type = "nearest_interp" - self.interp_method = 'nearest' - self._cpu_only = True - self.use_mkldnn = True - self.input_shape = [1, 1, 2, 2] - self.data_layout = 'NCHW' - # priority: actual_shape > out_size > scale > out_h & out_w - self.out_h = 1 - self.out_w = 1 - self.scale = 2.0 - self.out_size = None - self.actual_shape = None - - self.init_test_case() - self.init_data_type() - - if self.dtype == np.float32: - input_np = np.random.random(self.input_shape).astype(self.dtype) - else: - init_low, init_high = (-5, 5) if self.dtype == np.int8 else (0, 10) - input_np = np.random.randint( - init_low, init_high, self.input_shape - ).astype(self.dtype) - - if self.data_layout == "NCHW": - in_h = self.input_shape[2] - in_w = self.input_shape[3] - else: - in_h = self.input_shape[1] - in_w = self.input_shape[2] - - if self.scale > 0: - out_h = int(in_h * self.scale) - out_w = int(in_w * self.scale) - else: - out_h = self.out_h - out_w = self.out_w - - output_np = nearest_neighbor_interp_mkldnn_np( - input_np, - out_h, - out_w, - self.out_size, - self.actual_shape, - self.data_layout, - ) - - self.inputs = {'X': input_np} - if self.out_size is not None: - self.inputs['OutSize'] = self.out_size - if self.actual_shape is not None: - self.inputs['OutSize'] = self.actual_shape - self.attrs = { - 'interp_method': self.interp_method, - 'out_h': self.out_h, - 'out_w': self.out_w, - 'scale': self.scale, - 'data_layout': self.data_layout, - 'use_mkldnn': self.use_mkldnn, - } - self.outputs = {'Out': output_np} - - def test_check_output(self): - self.check_output(check_dygraph=False) - - -class TestNearestInterpOpMKLDNNNHWC(TestNearestInterpMKLDNNOp): - def init_test_case(self): - self.input_shape = [3, 2, 32, 16] - self.out_h = 27 - self.out_w = 49 - self.scale = 2.0 - self.data_layout = 'NHWC' - - -class TestNearestNeighborInterpMKLDNNCase2(TestNearestInterpMKLDNNOp): - def init_test_case(self): - self.input_shape = [3, 3, 9, 6] - self.out_h = 12 - self.out_w = 12 - self.scale = 1.0 - - -class TestNearestNeighborInterpCase3(TestNearestInterpMKLDNNOp): - def init_test_case(self): - self.input_shape = [1, 1, 32, 64] - self.out_h = 64 - self.out_w = 128 - self.scale = 0.0 - - -class TestNearestNeighborInterpCase4(TestNearestInterpMKLDNNOp): - def init_test_case(self): - self.input_shape = [1, 1, 32, 64] - self.out_h = 64 - self.out_w = 32 - self.scale = 0.0 - self.out_size = np.array([65, 129]).astype("int32") - - -class TestNearestNeighborInterpSame(TestNearestInterpMKLDNNOp): - def init_test_case(self): - self.input_shape = [2, 3, 32, 64] - self.out_h = 32 - self.out_w = 64 - self.scale = 0.0 - - -def create_test_class(parent): - ''' - Create tests for int, uint8. By default parent class works on fp32. - ''' - - class TestInt8Case(parent): - def init_data_type(self): - self.dtype = np.int8 - - class TestUint8Case(parent): - def init_data_type(self): - self.dtype = np.uint8 - - TestInt8Case.__name__ = "{}_{}".format(parent.__name__, "INT8") - TestUint8Case.__name__ = "{}_{}".format(parent.__name__, "UINT8") - globals()[TestInt8Case.__name__] = TestInt8Case - globals()[TestUint8Case.__name__] = TestUint8Case - - -create_test_class(TestNearestInterpMKLDNNOp) -create_test_class(TestNearestInterpOpMKLDNNNHWC) -create_test_class(TestNearestNeighborInterpMKLDNNCase2) -create_test_class(TestNearestNeighborInterpCase3) -create_test_class(TestNearestNeighborInterpCase4) -create_test_class(TestNearestNeighborInterpSame) - -if __name__ == "__main__": - from paddle import enable_static - - enable_static() - unittest.main() diff --git a/test/xpu/CMakeLists.txt b/test/xpu/CMakeLists.txt index 4b269e60cfa12d..ad2de316465cf5 100644 --- a/test/xpu/CMakeLists.txt +++ b/test/xpu/CMakeLists.txt @@ -32,7 +32,6 @@ endforeach() set_tests_properties(test_conv2d_op_xpu PROPERTIES TIMEOUT 120) set_tests_properties(test_mul_op_xpu PROPERTIES TIMEOUT 120) set_tests_properties(test_matmul_v2_op_xpu PROPERTIES TIMEOUT 900) -set_tests_properties(test_matmul_op_xpu PROPERTIES TIMEOUT 300) set_tests_properties(test_collective_identity_xpu PROPERTIES LABELS "RUN_TYPE=DIST_KUNLUN") set_tests_properties(test_collective_allgather_xpu diff --git a/test/xpu/test_bilinear_interp_op_xpu.py b/test/xpu/test_bilinear_interp_op_xpu.py deleted file mode 100755 index 6c08731d3b01d9..00000000000000 --- a/test/xpu/test_bilinear_interp_op_xpu.py +++ /dev/null @@ -1,508 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import paddle - -paddle.enable_static() -''' -def bilinear_interp_np(input, - out_h, - out_w, - out_size=None, - actual_shape=None, - align_corners=True, - align_mode=0, - data_layout='NCHW'): - """bilinear interpolation implement in shape [N, C, H, W]""" - if data_layout == "NHWC": - input = np.transpose(input, (0, 3, 1, 2)) # NHWC => NCHW - if out_size is not None: - out_h = out_size[0] - out_w = out_size[1] - if actual_shape is not None: - out_h = actual_shape[0] - out_w = actual_shape[1] - batch_size, channel, in_h, in_w = input.shape - - ratio_h = ratio_w = 0.0 - if out_h > 1: - if (align_corners): - ratio_h = (in_h - 1.0) / (out_h - 1.0) - else: - ratio_h = 1.0 * in_h / out_h - if out_w > 1: - if (align_corners): - ratio_w = (in_w - 1.0) / (out_w - 1.0) - else: - ratio_w = 1.0 * in_w / out_w - - out = np.zeros((batch_size, channel, out_h, out_w)) - - for i in range(out_h): - if (align_mode == 0 and not align_corners): - h = int(ratio_h * (i + 0.5) - 0.5) - else: - h = int(ratio_h * i) - - h = max(0, h) - hid = 1 if h < in_h - 1 else 0 - if (align_mode == 0 and not align_corners): - idx_src_h = max(ratio_h * (i + 0.5) - 0.5, 0) - h1lambda = idx_src_h - h - else: - h1lambda = ratio_h * i - h - h2lambda = 1.0 - h1lambda - for j in range(out_w): - if (align_mode == 0 and not align_corners): - w = int(ratio_w * (j + 0.5) - 0.5) - else: - w = int(ratio_w * j) - w = max(0, w) - wid = 1 if w < in_w - 1 else 0 - if (align_mode == 0 and not align_corners): - idx_src_w = max(ratio_w * (j + 0.5) - 0.5, 0) - w1lambda = idx_src_w - w - else: - w1lambda = ratio_w * j - w - w2lambda = 1.0 - w1lambda - - out[:, :, i, j] = h2lambda*(w2lambda*input[:, :, h, w] + - w1lambda*input[:, :, h, w+wid]) + \ - h1lambda*(w2lambda*input[:, :, h+hid, w] + - w1lambda*input[:, :, h+hid, w+wid]) - - if data_layout == "NHWC": - out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC - - return out.astype(input.dtype) - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpOp(XPUOpTest): - def setUp(self): - self.use_xpu = True - self.out_size = None - self.actual_shape = None - self.data_layout = 'NCHW' - self.init_test_case() - self.op_type = "bilinear_interp" - input_np = np.random.random(self.input_shape).astype("float32") - - if self.data_layout == "NCHW": - in_h = self.input_shape[2] - in_w = self.input_shape[3] - else: - in_h = self.input_shape[1] - in_w = self.input_shape[2] - - if self.scale > 0: - out_h = int(in_h * self.scale) - out_w = int(in_w * self.scale) - else: - out_h = self.out_h - out_w = self.out_w - - output_np = bilinear_interp_np(input_np, out_h, out_w, self.out_size, - self.actual_shape, self.align_corners, - self.align_mode, self.data_layout) - self.inputs = {'X': input_np} - if self.out_size is not None: - self.inputs['OutSize'] = self.out_size - if self.actual_shape is not None: - self.inputs['OutSize'] = self.actual_shape - - self.attrs = { - 'out_h': self.out_h, - 'out_w': self.out_w, - 'scale': self.scale, - 'interp_method': self.interp_method, - 'align_corners': self.align_corners, - 'align_mode': self.align_mode, - 'data_layout': self.data_layout - } - self.outputs = {'Out': output_np} - - def test_check_output(self): - place = paddle.XPUPlace(0) - self.check_output_with_place(place) - - def test_check_grad(self): - place = paddle.XPUPlace(0) - self.check_grad_with_place(place, ['X'], 'Out', in_place=True) - - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 3, 5, 5] - self.out_h = 2 - self.out_w = 2 - self.scale = 0. - self.out_size = np.array([3, 3]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpCase1(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [4, 1, 7, 8] - self.out_h = 1 - self.out_w = 1 - self.scale = 0. - self.align_corners = True - self.align_mode = 1 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpCase2(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [3, 3, 9, 6] - self.out_h = 12 - self.out_w = 12 - self.scale = 0. - self.align_corners = True - self.align_mode = 1 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpCase3(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [1, 1, 32, 64] - self.out_h = 64 - self.out_w = 32 - self.scale = 0. - self.align_corners = True - self.align_mode = 1 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpCase4(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [4, 1, 7, 8] - self.out_h = 1 - self.out_w = 1 - self.scale = 0. - self.out_size = np.array([2, 2]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpCase5(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [3, 3, 9, 6] - self.out_h = 12 - self.out_w = 12 - self.scale = 0. - self.out_size = np.array([11, 11]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpCase6(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [1, 1, 32, 64] - self.out_h = 64 - self.out_w = 32 - self.scale = 0. - self.out_size = np.array([65, 33]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpSame(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 3, 32, 64] - self.out_h = 32 - self.out_w = 64 - self.scale = 0. - self.align_corners = True - self.align_mode = 1 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpActualShape(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [3, 2, 32, 16] - self.out_h = 64 - self.out_w = 32 - self.scale = 0. - self.out_size = np.array([66, 40]).astype("int32") - self.align_corners = True - self.align_mode = 1 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpDataLayout(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 5, 5, 3] - self.out_h = 2 - self.out_w = 2 - self.scale = 0. - self.out_size = np.array([3, 3]).astype("int32") - self.align_corners = True - self.align_mode = 1 - self.data_layout = "NHWC" - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpOtherMethod1(TestBilinearInterpOp): - def set_align_mode(self): - self.align_corners = False - self.align_mode = 1 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpWithMethod2(TestBilinearInterpOp): - def set_align_mode(self): - self.align_corners = False - self.align_mode = 0 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpWithMethod3(TestBilinearInterpOp): - def set_align_mode(self): - self.align_corners = True - self.align_mode = 0 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpScale1(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 3, 5, 7] - self.out_h = 60 - self.out_w = 25 - self.scale = 2. - self.align_corners = True - self.align_mode = 1 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpScale2(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 3, 5, 7] - self.out_h = 60 - self.out_w = 25 - self.scale = 1. - self.align_corners = True - self.align_mode = 1 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpScale3(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 3, 5, 7] - self.out_h = 60 - self.out_w = 25 - self.scale = 1.5 - self.align_corners = True - self.align_mode = 1 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpZero(TestBilinearInterpOp): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 3, 5, 7] - self.out_h = 60 - self.out_w = 25 - self.scale = 0.2 - self.align_corners = False - self.align_mode = 0 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpOp_attr_tensor(XPUOpTest): - def setUp(self): - self.out_size = None - self.actual_shape = None - self.init_test_case() - self.op_type = "bilinear_interp" - self.shape_by_1Dtensor = False - self.scale_by_1Dtensor = False - self.attrs = { - 'interp_method': self.interp_method, - 'align_corners': self.align_corners, - } - - input_np = np.random.random(self.input_shape).astype("float32") - self.inputs = {'X': input_np} - - if self.scale_by_1Dtensor: - self.inputs['Scale'] = np.array([self.scale]).astype("float32") - elif self.scale > 0: - out_h = int(self.input_shape[2] * self.scale) - out_w = int(self.input_shape[3] * self.scale) - self.attrs['scale'] = self.scale - else: - out_h = self.out_h - out_w = self.out_w - - if self.shape_by_1Dtensor: - self.inputs['OutSize'] = self.out_size - elif self.out_size is not None: - size_tensor = [] - for index, ele in enumerate(self.out_size): - size_tensor.append(("x" + str(index), np.ones( - (1)).astype('int32') * ele)) - self.inputs['SizeTensor'] = size_tensor - - self.attrs['out_h'] = self.out_h - self.attrs['out_w'] = self.out_w - output_np = bilinear_interp_np(input_np, out_h, out_w, self.out_size, - self.actual_shape, self.align_corners) - self.outputs = {'Out': output_np} - - def test_check_output(self): - place = paddle.XPUPlace(0) - self.check_output_with_place(place) - - def test_check_grad(self): - place = paddle.XPUPlace(0) - self.check_grad_with_place(place, ['X'], 'Out', in_place=True) - - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [2, 3, 5, 5] - self.out_h = 3 - self.out_w = 3 - self.scale = 0. - self.out_size = [3, 3] - self.align_corners = True - - -# out_size is a 1-D tensor -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterp_attr_tensor_Case1(TestBilinearInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [3, 3, 9, 6] - self.out_h = 12 - self.out_w = 12 - self.scale = 0. - self.out_size = [8, 12] - self.align_corners = True - - -# scale is a 1-D tensor -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterp_attr_tensor_Case2(TestBilinearInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [3, 2, 32, 16] - self.out_h = 64 - self.out_w = 32 - self.scale = 0. - self.out_size = np.array([66, 40]).astype("int32") - self.align_corners = True - self.shape_by_1Dtensor = True - - -# scale is a 1-D tensor -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterp_attr_tensor_Case3(TestBilinearInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = 'bilinear' - self.input_shape = [3, 2, 32, 16] - self.out_h = 64 - self.out_w = 32 - self.scale = 2.0 - self.out_size = None - self.align_corners = True - self.scale_by_1Dtensor = True - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestBilinearInterpOpAPI(unittest.TestCase): - def test_case(self): - x = paddle.static.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - - dim = paddle.static.data(name="dim", shape=[1], dtype="int32") - shape_tensor = paddle.static.data(name="shape_tensor", shape=[2], dtype="int32") - actual_size = paddle.static.data(name="actual_size", shape=[2], dtype="int32") - scale_tensor = paddle.static.data( - name="scale_tensor", shape=[1], dtype="float32") - - out1 = base.layers.resize_bilinear(x, out_shape=[12, 12]) - out2 = base.layers.resize_bilinear(x, out_shape=[12, dim]) - out3 = base.layers.resize_bilinear(x, out_shape=shape_tensor) - out4 = base.layers.resize_bilinear( - x, out_shape=[4, 4], actual_shape=actual_size) - out5 = base.layers.resize_bilinear(x, scale=scale_tensor) - - x_data = np.random.random((2, 3, 6, 6)).astype("float32") - dim_data = np.array([12]).astype("int32") - shape_data = np.array([12, 12]).astype("int32") - actual_size_data = np.array([12, 12]).astype("int32") - scale_data = np.array([2.0]).astype("float32") - - place = core.XPUPlace(0) - exe = base.Executor(place) - exe.run(base.default_startup_program()) - results = exe.run(base.default_main_program(), - feed={ - "x": x_data, - "dim": dim_data, - "shape_tensor": shape_data, - "actual_size": actual_size_data, - "scale_tensor": scale_data - }, - fetch_list=[out1, out2, out3, out4, out5], - return_numpy=True) - - expect_res = bilinear_interp_np( - x_data, out_h=12, out_w=12, align_corners=True) - for res in results: - np.testing.assert_allclose(res, expect_res) -''' - -if __name__ == "__main__": - unittest.main() diff --git a/test/xpu/test_matmul_op_xpu.py b/test/xpu/test_matmul_op_xpu.py deleted file mode 100644 index bc944b2608c045..00000000000000 --- a/test/xpu/test_matmul_op_xpu.py +++ /dev/null @@ -1,387 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from get_test_cover_info import ( - XPUOpTestWrapper, - create_test_class, - get_xpu_op_support_types, -) -from op_test_xpu import XPUOpTest - -import paddle -from paddle import base - - -def reference_matmul(X, Y, transpose_X=False, transpose_Y=False): - """Reference forward implementation using np.matmul.""" - # np.matmul does not support the transpose flags, so we manually - # transpose X and Y appropriately. - if transpose_X: - if X.ndim == 1: - X = X.reshape((X.size, 1)) - elif X.ndim == 2: - X = X.T - else: - dim = list(range(len(X.shape))) - dim[-1], dim[len(X.shape) - 2] = dim[len(X.shape) - 2], dim[-1] - X = np.transpose(X, tuple(dim)) - if transpose_Y: - if Y.ndim == 1: - Y = Y.reshape((1, Y.size)) - elif Y.ndim == 2: - Y = Y.T - else: - dim = list(range(len(Y.shape))) - dim[-1], dim[len(Y.shape) - 2] = dim[len(Y.shape) - 2], dim[-1] - Y = np.transpose(Y, tuple(dim)) - - if X.ndim == 3 and Y.ndim == 2: - x_dims = X.shape - X = X.reshape((x_dims[0] * x_dims[1], x_dims[2])) - if Y.ndim == 3 and X.ndim == 2: - y_dims = Y.shape - Y = Y.reshape((y_dims[0] * y_dims[1], y_dims[2])) - Out = np.matmul(X, Y) - return Out - - -def generate_compatible_shapes( - dim_X, dim_Y, transpose_X, transpose_Y, batch_size -): - BATCH_SIZE = 2 - if batch_size is not None: - BATCH_SIZE = batch_size - - M = 3 - N = 4 - K = 5 - if (dim_X == 1 and transpose_X) or (dim_Y == 1 and transpose_Y): - K = 1 - if dim_X == 1: - if transpose_X: - shape_X = [M] - else: - shape_X = [K] - if dim_Y == 1: - if transpose_Y: - shape_Y = [N] - else: - shape_Y = [K] - if dim_X >= 2: - if transpose_X: - shape_X = [K, M] - else: - shape_X = [M, K] - if dim_X == 3: - shape_X = [BATCH_SIZE] + shape_X - if dim_Y >= 2: - if transpose_Y: - shape_Y = [N, K] - else: - shape_Y = [K, N] - if dim_Y == 3: - shape_Y = [BATCH_SIZE] + shape_Y - - if dim_Y == 3 and dim_X == 2: - if not transpose_X: - shape_X[1] = shape_X[1] * BATCH_SIZE - else: - shape_X[0] = shape_X[0] * BATCH_SIZE - - return shape_X, shape_Y - - -def generate_compatible_shapes_2(dim, transpose_X, transpose_Y): - M = 2 - N = 4 - K = 3 - shape_X = [2 for _ in range(dim - 2)] - shape_Y = [2 for _ in range(dim - 2)] - - if transpose_X: - shape_X += [K, M] - else: - shape_X += [M, K] - - if transpose_Y: - shape_Y += [N, K] - else: - shape_Y += [K, N] - - return shape_X, shape_Y - - -class XPUTestMatmulOpErr(XPUOpTestWrapper): - def __init__(self): - self.op_name = "matmul" - self.use_dynamic_create_class = False - - class API_TestMm(unittest.TestCase): - def test_out(self): - with base.program_guard(base.Program()): - x = paddle.static.data(name="x", shape=[2], dtype=self.in_type) - y = paddle.static.data(name='y', shape=[2], dtype=self.in_type) - result = paddle.mm(x, y) - exe = base.Executor(base.XPUPlace(0)) - data1 = np.random.rand(2).astype(self.in_type) - data2 = np.random.rand(2).astype(self.in_type) - np_res = exe.run( - feed={'x': data1, 'y': data2}, fetch_list=[result] - ) - expected_result = np.matmul(data1, data2) - - np.testing.assert_allclose(np_res, expected_result, atol=1e-3) - - def test_dygraph_without_out(self): - device = base.XPUPlace(0) - with base.dygraph.guard(device): - input_array1 = np.random.rand(3, 4).astype(self.in_type) - input_array2 = np.random.rand(4, 3).astype(self.in_type) - data1 = paddle.to_tensor(input_array1) - data2 = paddle.to_tensor(input_array2) - out = paddle.mm(data1, data2) - expected_result = np.matmul(input_array1, input_array2) - np.testing.assert_allclose( - expected_result, out.numpy(), atol=1e-3 - ) - - class Test_API_Matmul(unittest.TestCase): - def test_dygraph_without_out(self): - device = base.XPUPlace(0) - with base.dygraph.guard(device): - input_array1 = np.random.rand(3, 4).astype(self.in_type) - input_array2 = np.random.rand(4, 3).astype(self.in_type) - data1 = paddle.to_tensor(input_array1).astype(self.in_type) - data2 = paddle.to_tensor(input_array2).astype(self.in_type) - out = paddle.matmul(data1, data2) - expected_result = np.matmul(input_array1, input_array2) - np.testing.assert_allclose( - expected_result, out.numpy(), atol=1e-3 - ) - - class API_TestMmError(unittest.TestCase): - def test_errors(self): - def test_error1(): - with base.program_guard(base.Program(), base.Program()): - data1 = paddle.static.data( - name="data1", shape=[10, 2], dtype="float32" - ) - data2 = paddle.static.data( - name="data2", shape=[3, 10], dtype="float32" - ) - paddle.mm(data1, data2) - - self.assertRaises(ValueError, test_error1) - - def test_error2(): - with base.program_guard(base.Program(), base.Program()): - data1 = paddle.static.data( - name="data1", shape=[-1, 10, 2], dtype="float32" - ) - data2 = paddle.static.data( - name="data2", shape=[-1, 2, 10], dtype="float32" - ) - paddle.mm(data1, data2) - - test_error2() - - def test_error3(): - with base.program_guard(base.Program(), base.Program()): - data1 = paddle.static.data( - name="data1", shape=[10, 10, 2], dtype="float32" - ) - data2 = paddle.static.data( - name="data2", shape=[3, 2, 10], dtype="float32" - ) - paddle.mm(data1, data2) - - self.assertRaises(ValueError, test_error3) - - -class TestMatmulBaseGenerator(XPUOpTest): - def setUp(self): - self.op_type = "matmul" - self.dtype = ( - np.float32 if not hasattr(self, 'in_type') else self.in_type - ) - - self.__class__.no_need_check_grad = ( - False - if not hasattr(self, 'no_need_check_grad') - else self.no_need_check_grad - ) - - shape_X = [4, 5] if not hasattr(self, 'shape_X') else self.shape_X - shape_Y = [5, 6] if not hasattr(self, 'shape_Y') else self.shape_Y - transpose_X = ( - False if not hasattr(self, 'transpose_X') else self.transpose_X - ) - transpose_Y = ( - False if not hasattr(self, 'transpose_Y') else self.transpose_Y - ) - - X = np.random.random(shape_X).astype(self.dtype) - Y = np.random.random(shape_Y).astype(self.dtype) - Out = reference_matmul(X, Y, transpose_X, transpose_Y).astype( - self.dtype - ) - self.inputs = {'X': X, 'Y': Y} - self.attrs = {'transpose_X': transpose_X, 'transpose_Y': transpose_Y} - self.outputs = {'Out': Out} - - def test_check_output(self): - place = paddle.XPUPlace(0) - self.check_output_with_place(place, atol=1e-3) - - def test_check_grad_normal(self): - if ( - hasattr(self.__class__, "no_need_check_grad") - and self.__class__.no_need_check_grad - ): - return - - place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['X', 'Y'], 'Out', max_relative_error=5e-2 - ) - - def test_check_grad_ignore_x(self): - if ( - hasattr(self.__class__, "no_need_check_grad") - and self.__class__.no_need_check_grad - ): - return - - place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['Y'], 'Out', max_relative_error=5e-2, no_grad_set=set("X") - ) - - def test_check_grad_ignore_y(self): - if ( - hasattr(self.__class__, "no_need_check_grad") - and self.__class__.no_need_check_grad - ): - return - - place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['X'], 'Out', max_relative_error=5e-2, no_grad_set=set('Y') - ) - - -class XPUTestMatmulOp1(XPUOpTestWrapper): - def __init__(self): - self.op_name = "matmul" - self.use_dynamic_create_class = True - - def dynamic_create_class(self): - base_class = TestMatmulBaseGenerator - classes = [] - xpu_support_dims_list = [[1, 1], [2, 2], [3, 3]] - batch_size = [2, 4, 5, 10, 50, 100, 300] - for dims in xpu_support_dims_list: - dim_X = dims[0] - dim_Y = dims[1] - for transpose_x in [True, False]: - for transpose_y in [True, False]: - for batch in batch_size: - no_need_check_grad = False - if batch >= 5: - no_need_check_grad = True - class_name = f'TestMatMulOp_dimX_{dim_X}_dim_Y_{dim_Y}_transX_{transpose_x}_transY_{transpose_y}_batch_{batch}' - shape_x, shape_y = generate_compatible_shapes( - dim_X, dim_Y, transpose_x, transpose_y, batch - ) - attr_dict = { - 'shape_X': shape_x, - 'shape_Y': shape_y, - 'transpose_X': transpose_x, - 'transpose_Y': transpose_y, - 'no_need_check_grad': no_need_check_grad, - 'op_type': "matmul", - } - classes.append([class_name, attr_dict]) - - return base_class, classes - - -class XPUTestMatmulOp3(XPUOpTestWrapper): - def __init__(self): - self.op_name = "matmul" - self.use_dynamic_create_class = True - - def dynamic_create_class(self): - base_class = TestMatmulBaseGenerator - classes = [] - for dim in [4]: - for transpose_X in [False, True]: - for transpose_Y in [False, True]: - class_name = f'TestMatMulOp2_dimX_{dim}_dim_Y_{dim}_transX_{transpose_X}_transY_{transpose_Y}' - shape_X, shape_Y = generate_compatible_shapes_2( - dim, transpose_X, transpose_Y - ) - attr_dict = { - 'shape_X': shape_X, - 'shape_Y': shape_Y, - 'transpose_X': transpose_X, - 'transpose_Y': transpose_Y, - 'op_type': "matmul", - } - classes.append([class_name, attr_dict]) - return base_class, classes - - -class XPUTestMatmulOpBF16(XPUOpTestWrapper): - def __init__(self): - self.op_name = "matmul" - self.use_dynamic_create_class = True - - def dynamic_create_class(self): - base_class = TestMatmulBaseGenerator - classes = [] - for dim in [2]: - for transpose_X in [False, True]: - for transpose_Y in [False, True]: - class_name = f'TestMatMulOp2_dimX_{dim}_dim_Y_{dim}_transX_{transpose_X}_transY_{transpose_Y}' - shape_X, shape_Y = generate_compatible_shapes_2( - dim, transpose_X, transpose_Y - ) - attr_dict = { - 'shape_X': shape_X, - 'shape_Y': shape_Y, - 'transpose_X': transpose_X, - 'transpose_Y': transpose_Y, - 'op_type': "matmul", - } - classes.append([class_name, attr_dict]) - return base_class, classes - - -support_types = get_xpu_op_support_types('matmul') -for stype in support_types: - if "bfloat16" in str(stype): - # only support fc_fusion now - create_test_class(globals(), XPUTestMatmulOpBF16, stype) - else: - create_test_class(globals(), XPUTestMatmulOpErr, stype) - create_test_class(globals(), XPUTestMatmulOp1, stype) - create_test_class(globals(), XPUTestMatmulOp3, stype) - -if __name__ == "__main__": - paddle.enable_static() - unittest.main() diff --git a/test/xpu/test_nearest_interp_op_xpu.py b/test/xpu/test_nearest_interp_op_xpu.py deleted file mode 100644 index 1165521339da14..00000000000000 --- a/test/xpu/test_nearest_interp_op_xpu.py +++ /dev/null @@ -1,441 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import paddle - -paddle.enable_static() -''' -def nearest_neighbor_interp_np(X, - out_h, - out_w, - out_size=None, - actual_shape=None, - align_corners=True, - data_layout="NCHW"): - """nearest neighbor interpolation implement in shape [N, C, H, W]""" - if data_layout == "NHWC": - X = np.transpose(X, (0, 3, 1, 2)) # NHWC => NCHW - if out_size is not None: - out_h = out_size[0] - out_w = out_size[1] - if actual_shape is not None: - out_h = actual_shape[0] - out_w = actual_shape[1] - n, c, in_h, in_w = X.shape - - ratio_h = ratio_w = 0.0 - if (out_h > 1): - if (align_corners): - ratio_h = (in_h - 1.0) / (out_h - 1.0) - else: - ratio_h = 1.0 * in_h / out_h - if (out_w > 1): - if (align_corners): - ratio_w = (in_w - 1.0) / (out_w - 1.0) - else: - ratio_w = 1.0 * in_w / out_w - - out = np.zeros((n, c, out_h, out_w)) - - if align_corners: - for i in range(out_h): - in_i = int(ratio_h * i + 0.5) - for j in range(out_w): - in_j = int(ratio_w * j + 0.5) - out[:, :, i, j] = X[:, :, in_i, in_j] - else: - for i in range(out_h): - in_i = int(ratio_h * i) - for j in range(out_w): - in_j = int(ratio_w * j) - out[:, :, i, j] = X[:, :, in_i, in_j] - - if data_layout == "NHWC": - out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC - - return out.astype(X.dtype) - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestInterpOp(XPUOpTest): - def setUp(self): - self.use_xpu = True - self.out_size = None - self.actual_shape = None - self.data_layout = "NCHW" - self.init_test_case() - self.op_type = "nearest_interp" - input_np = np.random.random(self.input_shape).astype("float32") - - if self.data_layout == "NCHW": - in_h = self.input_shape[2] - in_w = self.input_shape[3] - else: - in_h = self.input_shape[1] - in_w = self.input_shape[2] - - if self.scale > 0: - out_h = int(in_h * self.scale) - out_w = int(in_w * self.scale) - else: - out_h = self.out_h - out_w = self.out_w - - output_np = nearest_neighbor_interp_np( - input_np, out_h, out_w, self.out_size, self.actual_shape, - self.align_corners, self.data_layout) - self.inputs = {"X": input_np} - if self.out_size is not None: - self.inputs["OutSize"] = self.out_size - if self.actual_shape is not None: - self.inputs["OutSize"] = self.actual_shape - self.attrs = { - "out_h": self.out_h, - "out_w": self.out_w, - "scale": self.scale, - "interp_method": self.interp_method, - "align_corners": self.align_corners, - "data_layout": self.data_layout - } - self.outputs = {"Out": output_np} - - def test_check_output(self): - place = paddle.XPUPlace(0) - self.check_output_with_place(place) - - def test_check_grad(self): - place = paddle.XPUPlace(0) - self.check_grad_with_place(place, ["X"], "Out", in_place=True) - - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [2, 3, 4, 5] - self.out_h = 2 - self.out_w = 2 - self.scale = 0. - self.out_size = np.array([3, 3]).astype("int32") - self.align_corners = True - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestNeighborInterpCase1(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [4, 1, 7, 8] - self.out_h = 1 - self.out_w = 1 - self.scale = 0. - self.align_corners = True - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestNeighborInterpCase2(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [3, 3, 9, 6] - self.out_h = 12 - self.out_w = 12 - self.scale = 0. - self.align_corners = True - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestNeighborInterpCase3(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [1, 1, 32, 64] - self.out_h = 64 - self.out_w = 32 - self.scale = 0. - self.align_corners = True - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestNeighborInterpCase4(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [4, 1, 7, 8] - self.out_h = 1 - self.out_w = 1 - self.scale = 0. - self.out_size = np.array([2, 2]).astype("int32") - self.align_corners = True - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestNeighborInterpCase5(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [3, 3, 9, 6] - self.out_h = 12 - self.out_w = 12 - self.scale = 0. - self.out_size = np.array([11, 11]).astype("int32") - self.align_corners = True - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestNeighborInterpCase6(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [1, 1, 32, 64] - self.out_h = 64 - self.out_w = 32 - self.scale = 0. - self.out_size = np.array([65, 129]).astype("int32") - self.align_corners = True - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestNeighborInterpSame(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [2, 3, 32, 64] - self.out_h = 32 - self.out_w = 64 - self.scale = 0. - self.align_corners = True - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestNeighborInterpActualShape(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [3, 2, 32, 16] - self.out_h = 64 - self.out_w = 32 - self.scale = 0. - self.out_size = np.array([66, 40]).astype("int32") - self.align_corners = True - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestNeighborInterpDataLayout(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [2, 4, 4, 5] - self.out_h = 2 - self.out_w = 2 - self.scale = 0. - self.out_size = np.array([3, 8]).astype("int32") - self.align_corners = True - self.data_layout = "NCHW" - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestInterpWithoutCorners(TestNearestInterpOp): - def set_align_corners(self): - self.align_corners = False - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestNeighborInterpScale1(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [3, 2, 7, 5] - self.out_h = 64 - self.out_w = 32 - self.scale = 2. - self.out_size = np.array([66, 40]).astype("int32") - self.align_corners = True - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestNeighborInterpScale2(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [3, 2, 5, 7] - self.out_h = 64 - self.out_w = 32 - self.scale = 1.5 - self.out_size = np.array([66, 40]).astype("int32") - self.align_corners = True - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestNeighborInterpScale3(TestNearestInterpOp): - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [3, 2, 7, 5] - self.out_h = 64 - self.out_w = 32 - self.scale = 1. - self.out_size = np.array([66, 40]).astype("int32") - self.align_corners = True - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestInterpOp_attr_tensor(XPUOpTest): - def setUp(self): - self.out_size = None - self.actual_shape = None - self.shape_by_1Dtensor = False - self.scale_by_1Dtensor = False - self.scale_by_2Dtensor = False - self.init_test_case() - self.op_type = "nearest_interp" - self.attrs = { - "interp_method": self.interp_method, - "align_corners": self.align_corners, - } - - input_np = np.random.random(self.input_shape).astype("float32") - self.inputs = {"X": input_np} - - if self.scale_by_1Dtensor: - self.inputs["Scale"] = np.array([self.scale]).astype("float32") - out_h = int(self.input_shape[2] * self.scale) - out_w = int(self.input_shape[3] * self.scale) - elif self.scale_by_2Dtensor: - self.inputs['Scale'] = np.array(self.scale).astype("float32") - out_h = int(self.input_shape[2] * self.scale[0]) - out_w = int(self.input_shape[3] * self.scale[1]) - else: - out_h = self.out_h - out_w = self.out_w - - if self.shape_by_1Dtensor: - self.inputs["OutSize"] = self.out_size - elif self.out_size is not None: - size_tensor = [] - for index, ele in enumerate(self.out_size): - size_tensor.append(("x" + str(index), np.ones( - (1)).astype("int32") * ele)) - self.inputs["SizeTensor"] = size_tensor - - self.attrs["out_h"] = self.out_h - self.attrs["out_w"] = self.out_w - output_np = nearest_neighbor_interp_np(input_np, out_h, out_w, - self.out_size, self.actual_shape, - self.align_corners) - self.outputs = {"Out": output_np} - - def test_check_output(self): - place = paddle.XPUPlace(0) - self.check_output_with_place(place) - - def test_check_grad(self): - place = paddle.XPUPlace(0) - self.check_grad_with_place(place, ["X"], "Out", in_place=True) - - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [2, 5, 4, 4] - self.out_h = 3 - self.out_w = 3 - self.scale = 0. - self.out_size = [3, 3] - self.align_corners = True - - -# out_size is a tensor list -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestInterp_attr_tensor_Case1(TestNearestInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [3, 3, 9, 6] - self.out_h = 12 - self.out_w = 12 - self.scale = 0. - self.out_size = [8, 12] - self.align_corners = True - - -# out_size is a 1-D tensor -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestInterp_attr_tensor_Case2(TestNearestInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [3, 2, 32, 16] - self.out_h = 64 - self.out_w = 32 - self.scale = 0. - self.out_size = np.array([66, 40]).astype("int32") - self.align_corners = True - self.shape_by_1Dtensor = True - - -# scale is a 1-D tensor -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestInterp_attr_tensor_Case3(TestNearestInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = "nearest" - self.input_shape = [3, 2, 32, 16] - self.out_h = 64 - self.out_w = 32 - self.scale = 2.0 - self.out_size = None - self.align_corners = True - self.scale_by_1Dtensor = True - - -# scale is a 2-D tensor -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestInterp_attr_tensor_Case4(TestNearestInterpOp_attr_tensor): - def init_test_case(self): - self.interp_method = 'nearest' - self.input_shape = [3, 2, 32, 16] - self.out_h = 64 - self.out_w = 32 - self.scale = [2.0, 2.0] - self.out_size = None - self.align_corners = True - self.scale_by_2Dtensor = True - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestNearestInterpException(unittest.TestCase): - def test_exception(self): - input = paddle.static.data(name="input", shape=[1, 3, 6, 6], dtype="float32") - - def attr_data_format(): - # for 4-D input, data_format can only be NCHW or NHWC - out = base.layers.resize_nearest( - input, out_shape=[4, 8], data_format="NDHWC") - - def attr_scale_type(): - out = base.layers.resize_nearest(input, scale="scale") - - def attr_scale_value(): - out = base.layers.resize_nearest(input, scale=-0.3) - - self.assertRaises(ValueError, attr_data_format) - self.assertRaises(TypeError, attr_scale_type) - self.assertRaises(ValueError, attr_scale_value) -''' - -if __name__ == "__main__": - unittest.main() From 0fc0aa5d92fd9763dc41f40e0c387f12b3218b81 Mon Sep 17 00:00:00 2001 From: AyaseNana <49900969+NKNaN@users.noreply.github.com> Date: Thu, 4 Jul 2024 15:20:52 +0800 Subject: [PATCH 11/16] [Typing][B-28] Add type annotations for `python/paddle/distribution/uniform.py` (#65660) --------- Co-authored-by: Nyakku Shigure --- python/paddle/distribution/uniform.py | 36 ++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/python/paddle/distribution/uniform.py b/python/paddle/distribution/uniform.py index b9b4cf1e334803..cefbeef9c60433 100644 --- a/python/paddle/distribution/uniform.py +++ b/python/paddle/distribution/uniform.py @@ -11,8 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +from typing import TYPE_CHECKING, Sequence import numpy as np +import numpy.typing as npt import paddle from paddle import _C_ops @@ -22,6 +26,9 @@ from paddle.framework import in_dynamic_mode from paddle.tensor import random +if TYPE_CHECKING: + from paddle import Tensor + class Uniform(distribution.Distribution): r"""Uniform distribution with `low` and `high` parameters. @@ -99,8 +106,25 @@ class Uniform(distribution.Distribution): Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, [0.50000000]) """ - - def __init__(self, low, high, name=None): + low: Tensor + high: Tensor + + def __init__( + self, + low: ( + float + | Sequence[float] + | npt.NDArray[np.float32 | np.float64] + | Tensor + ), + high: ( + float + | Sequence[float] + | npt.NDArray[np.float32 | np.float64] + | Tensor + ), + name: str | None = None, + ) -> None: if not in_dynamic_mode(): check_type( low, @@ -165,7 +189,7 @@ def __init__(self, low, high, name=None): super().__init__(self.low.shape) - def sample(self, shape, seed=0): + def sample(self, shape: list[int], seed: int = 0) -> Tensor: """Generate samples of the specified shape. Args: @@ -218,7 +242,7 @@ def sample(self, shape, seed=0): else: return output - def log_prob(self, value): + def log_prob(self, value: Tensor) -> Tensor: """Log probability density/mass function. Args: @@ -247,7 +271,7 @@ def log_prob(self, value): paddle.log(lb * ub), paddle.log(self.high - self.low), name=name ) - def probs(self, value): + def probs(self, value: Tensor) -> Tensor: """Probability density/mass function. Args: @@ -272,7 +296,7 @@ def probs(self, value): ub = paddle.cast(ub_bool, dtype=value.dtype) return paddle.divide((lb * ub), (self.high - self.low), name=name) - def entropy(self): + def entropy(self) -> Tensor: r"""Shannon entropy in nats. The entropy is From a72432c1885c048b01d68f453f77ad613838f93f Mon Sep 17 00:00:00 2001 From: ooo oo <106524776+ooooo-create@users.noreply.github.com> Date: Thu, 4 Jul 2024 15:21:32 +0800 Subject: [PATCH 12/16] [Typing][A-100] Add type annotations for `python/paddle/io/dataloader/worker.py` (#65645) --------- Co-authored-by: SigureMo --- python/paddle/io/dataloader/worker.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/paddle/io/dataloader/worker.py b/python/paddle/io/dataloader/worker.py index a559a616bb2963..b1284a646f656c 100644 --- a/python/paddle/io/dataloader/worker.py +++ b/python/paddle/io/dataloader/worker.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations import os import queue @@ -76,7 +77,7 @@ def is_alive(self): _worker_info = None -def get_worker_info(): +def get_worker_info() -> WorkerInfo | None: """ Get DataLoader worker process information function, this function is used to split data copy in worker process for IterableDataset @@ -117,8 +118,8 @@ def get_worker_info(): ... else: ... per_worker = int( ... math.ceil((self.end - self.start) / float( - ... worker_info.num_workers))) - ... worker_id = worker_info.id + ... worker_info.num_workers))) # type: ignore[attr-defined] + ... worker_id = worker_info.id # type: ignore[attr-defined] ... iter_start = self.start + worker_id * per_worker ... iter_end = min(iter_start + per_worker, self.end) ... From 1b9663140806092791da3ae333e53211cdf67fdc Mon Sep 17 00:00:00 2001 From: megemini Date: Thu, 4 Jul 2024 15:25:21 +0800 Subject: [PATCH 13/16] =?UTF-8?q?[Typing]=20=E4=BF=AE=E7=90=86=E9=83=A8?= =?UTF-8?q?=E5=88=86=E7=A4=BA=E4=BE=8B=E4=B8=AD=E7=9A=84=E7=B1=BB=E5=9E=8B?= =?UTF-8?q?=E7=BC=BA=E5=A4=B1=20`var-annotated`=20=E4=BB=A5=E5=8F=8A?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=85=B7=E4=BD=93=20ignore=20=E7=B1=BB?= =?UTF-8?q?=E5=9E=8B=20(#65644)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/paddle/amp/debugging.py | 2 +- python/paddle/base/layers/math_op_patch.py | 2 +- python/paddle/distributed/communication/stream/gather.py | 2 +- python/paddle/distributed/parallel.py | 2 +- python/paddle/optimizer/lbfgs.py | 6 +++--- python/paddle/tensor/attribute.py | 2 +- python/paddle/tensor/creation.py | 2 +- python/paddle/vision/transforms/transforms.py | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/python/paddle/amp/debugging.py b/python/paddle/amp/debugging.py index 1b6e575cdbec98..8b4340a2c49359 100644 --- a/python/paddle/amp/debugging.py +++ b/python/paddle/amp/debugging.py @@ -88,7 +88,7 @@ def check_layer_numerics(func): ... return x @ self._w + self._b ... >>> dtype = 'float32' - >>> x = paddle.rand([10, 2, 2], dtype=dtype) # type: ignore + >>> x = paddle.rand([10, 2, 2], dtype=dtype) # type: ignore[arg-type] >>> model = MyLayer(dtype) >>> x[0] = float(0) >>> loss = model(x) diff --git a/python/paddle/base/layers/math_op_patch.py b/python/paddle/base/layers/math_op_patch.py index 1dd3c19b44d9aa..dfd4c802d89f96 100644 --- a/python/paddle/base/layers/math_op_patch.py +++ b/python/paddle/base/layers/math_op_patch.py @@ -352,7 +352,7 @@ def astype(self, dtype): >>> import paddle >>> import numpy as np - >>> x = np.ones([2, 2], np.float32) + >>> x = np.ones([2, 2], np.float32) # type: ignore[var-annotated] >>> with base.dygraph.guard(): ... original_variable = paddle.to_tensor(x) ... print("original var's dtype is: {}, numpy dtype is {}".format(original_variable.dtype, original_variable.numpy().dtype)) diff --git a/python/paddle/distributed/communication/stream/gather.py b/python/paddle/distributed/communication/stream/gather.py index c0405ec696bc0e..45b86b0215e0f8 100644 --- a/python/paddle/distributed/communication/stream/gather.py +++ b/python/paddle/distributed/communication/stream/gather.py @@ -83,7 +83,7 @@ def gather( >>> import paddle.distributed as dist >>> dist.init_parallel_env() - >>> gather_list = [] + >>> gather_list = [] # type: ignore[var-annotated] >>> if dist.get_rank() == 0: ... data = paddle.to_tensor([1, 2, 3]) ... dist.stream.gather(data, gather_list, dst=0) diff --git a/python/paddle/distributed/parallel.py b/python/paddle/distributed/parallel.py index 0d905b4f5d9856..791f8834c37a62 100644 --- a/python/paddle/distributed/parallel.py +++ b/python/paddle/distributed/parallel.py @@ -334,7 +334,7 @@ class DataParallel(layers.Layer): ... model = paddle.DataParallel(model) ... opt = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters()) ... for step in range(10): - ... x_data = numpy.random.randn(2, 2).astype(numpy.float32) + ... x_data = numpy.random.randn(2, 2).astype(numpy.float32) # type: ignore[var-annotated] ... x = paddle.to_tensor(x_data) ... x.stop_gradient = False ... # step 1 : skip gradient synchronization by 'no_sync' diff --git a/python/paddle/optimizer/lbfgs.py b/python/paddle/optimizer/lbfgs.py index a0198048ecfea0..5a41e119f08bf6 100644 --- a/python/paddle/optimizer/lbfgs.py +++ b/python/paddle/optimizer/lbfgs.py @@ -399,10 +399,10 @@ class LBFGS(Optimizer): >>> paddle.disable_static() >>> np.random.seed(0) - >>> np_w = np.random.rand(1).astype(np.float32) # type: ignore - >>> np_x = np.random.rand(1).astype(np.float32) # type: ignore + >>> np_w = np.random.rand(1).astype(np.float32) # type: ignore[var-annotated] + >>> np_x = np.random.rand(1).astype(np.float32) # type: ignore[var-annotated] - >>> inputs = [np.random.rand(1).astype(np.float32) for i in range(10)] # type: ignore + >>> inputs = [np.random.rand(1).astype(np.float32) for i in range(10)] # type: ignore[var-annotated] >>> # y = 2x >>> targets = [2 * x for x in inputs] diff --git a/python/paddle/tensor/attribute.py b/python/paddle/tensor/attribute.py index 2a0f4f5df2eed1..d4d35bcb1e05a6 100644 --- a/python/paddle/tensor/attribute.py +++ b/python/paddle/tensor/attribute.py @@ -102,7 +102,7 @@ def shape(input: Tensor) -> Tensor: >>> exe = paddle.static.Executor(paddle.CPUPlace()) >>> exe.run(paddle.static.default_startup_program()) - >>> img = np.ones((3, 100, 100)).astype(np.float32) # type: ignore + >>> img = np.ones((3, 100, 100)).astype(np.float32) # type: ignore[var-annotated] >>> res = exe.run(paddle.static.default_main_program(), feed={'x':img}, fetch_list=[output]) >>> print(res) diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index 506525d1e2e49c..8e6635a641f623 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -2483,7 +2483,7 @@ def assign(x: TensorLike, output: paddle.Tensor | None = None) -> paddle.Tensor: [2.5 2.5]] >>> array = np.array([[1, 1], [3, 4], [1, 3]]).astype( ... np.int64 - ... ) # type: ignore + ... ) # type: ignore[var-annotated] >>> result1 = paddle.zeros(shape=[3, 3], dtype='float32') >>> paddle.assign(array, result1) >>> print(result1.numpy()) diff --git a/python/paddle/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py index bf5fc470e87f3b..2e25cbc76e1643 100644 --- a/python/paddle/vision/transforms/transforms.py +++ b/python/paddle/vision/transforms/transforms.py @@ -232,7 +232,7 @@ class BaseTransform(_Transform[_InputT, _RetT]): ... else: ... raise TypeError("Unexpected type {}".format(type(img))) ... - >>> class CustomRandomFlip(BaseTransform): # type: ignore + >>> class CustomRandomFlip(BaseTransform): # type: ignore[type-arg] ... def __init__(self, prob=0.5, keys=None): ... super().__init__(keys) ... self.prob = prob From c517ecd766a2ebcf774f572c879b084a810c4f6d Mon Sep 17 00:00:00 2001 From: Qi Li Date: Thu, 4 Jul 2024 15:26:25 +0800 Subject: [PATCH 14/16] [Dockerfile][DCU][XPU] add develop dockerfile for dcu and xpu (#65654) * [DCU][XPU] add develop dockerfile for dcu and xpu * update comments --- tools/dockerfile/Dockerfile.develop.dtk | 108 +++++++++++++++++ tools/dockerfile/Dockerfile.develop.xre | 113 +++++++++++++++++ tools/dockerfile/Dockerfile.rocm | 153 ------------------------ 3 files changed, 221 insertions(+), 153 deletions(-) create mode 100644 tools/dockerfile/Dockerfile.develop.dtk create mode 100644 tools/dockerfile/Dockerfile.develop.xre delete mode 100644 tools/dockerfile/Dockerfile.rocm diff --git a/tools/dockerfile/Dockerfile.develop.dtk b/tools/dockerfile/Dockerfile.develop.dtk new file mode 100644 index 00000000000000..20a7390f38de63 --- /dev/null +++ b/tools/dockerfile/Dockerfile.develop.dtk @@ -0,0 +1,108 @@ +# Docker Image for PaddlePaddle Hygon DCU2 + +FROM sugonhub/kylin:v10-dev +LABEL maintainer="PaddlePaddle Authors " + +RUN yum install -y bzip2-devel openssh-server elfutils-devel diffutils libtool iproute \ + blas-devel lapack-devel make git patch unzip bison hostname yasm libsndfile-devel \ + automake which file net-tools zlib-devel libffi-devel vim tk-devel tkinter rpm-build \ + sqlite-devel xz-devel wget curl-devel initscripts mesa-libGL numactl-devel pcre-devel \ + openssl-devel libjpeg-turbo-devel libpng-devel ninja-build pciutils libzstd-devel \ + gcc gcc-c++ gcc-gfortran + +# workdir +WORKDIR /opt + +# cmake 3.27.7 +RUN wget -q https://cmake.org/files/v3.27/cmake-3.27.7-linux-x86_64.sh && \ + chmod +x cmake-3.27.7-linux-x86_64.sh && mkdir -p /opt/cmake-3.27.7 && \ + ./cmake-3.27.7-linux-x86_64.sh --prefix=/opt/cmake-3.27.7 --skip-license && \ + rm -rf cmake-3.27.7-linux-x86_64.sh && rm -rf /opt/cmake +RUN rm -rf /usr/bin/cmake /usr/bin/cmake3 && \ + ln -s /opt/cmake-3.27.7/bin/cmake /usr/bin/cmake && + ln -s /opt/cmake-3.27.7/bin/cmake /usr/bin/cmake3 +ENV PATH=/opt/cmake-3.27.7/bin:${PATH} + +# Python 3.10.14 +RUN wget -q https://www.python.org/ftp/python/3.10.14/Python-3.10.14.tgz && \ + tar xzf Python-3.10.14.tgz && cd Python-3.10.14 && \ + CFLAGS="-Wformat" ./configure --prefix=/usr/local/ --enable-shared > /dev/null && \ + make -j16 > /dev/null && make altinstall > /dev/null && ldconfig && \ + cd ../ && rm -rf Python-3.10.14 && rm -rf Python-3.10.14.tgz +ENV LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH} +ENV CPLUS_INCLUDE_PATH=/usr/local/include/python3.10:${CPLUS_INCLUDE_PATH} + +# create venv and activate +RUN /usr/local/bin/python3.10 -m venv /opt/py310 +# update env +ENV PATH=/opt/py310/bin:$PATH +RUN echo "source /opt/py310/bin/activate" >> /root/.bashrc +# upgrade pip +RUN pip install --upgrade pip setuptools wheel + +# install pylint and pre-commit +RUN pip install pre-commit==2.17.0 pylint pytest astroid isort coverage qtconsole distro +RUN pip install attrs pyyaml pathlib2 scipy requests psutil Cython clang-format==13.0.0 PyGithub + +# install Paddle requirement +RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt -O requirements.txt && \ + pip install -r requirements.txt && rm -rf requirements.txt +RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/unittest_py/requirements.txt -O requirements.txt && \ + pip install -r requirements.txt && rm -rf requirements.txt + +# git credential to skip password typing +RUN git config --global credential.helper store && \ + git config --global pull.rebase false + +# Fix locales to en_US.UTF-8 +RUN yum -y install glibc-locale-source glibc-langpack-en +RUN localedef -i en_US -f UTF-8 en_US.UTF-8 + +# patchelf 0.14.5 - https://github.com/NixOS/patchelf/pull/216 +RUN wget -q https://github.com/NixOS/patchelf/archive/refs/tags/0.14.5.tar.gz && \ + tar xzf 0.14.5.tar.gz && cd patchelf-0.14.5 && \ + ./bootstrap.sh > /dev/null && ./configure > /dev/null && \ + make -j16 > /dev/null && make install > /dev/null && \ + cd .. && rm -rf patchelf-0.14.5 && rm -rf 0.14.5.tar.gz + +# ccache 4.6.3 +RUN wget -q https://github.com/ccache/ccache/releases/download/v4.6.3/ccache-4.6.3.tar.gz && \ + tar xf ccache-4.6.3.tar.gz && mkdir /usr/local/ccache-4.6.3 && cd ccache-4.6.3 && \ + mkdir build && cd build && \ + cmake -DCMAKE_BUILD_TYPE=Release -DREDIS_STORAGE_BACKEND=OFF \ + -DCMAKE_INSTALL_PREFIX=/usr/local/ccache-4.6.3 .. > /dev/null && \ + make -j16 > /dev/null && make install > /dev/null && \ + cd ../../ && rm -rf ccache-4.6.3.tar.gz && rm -rf ccache-4.6.3 && \ + ln -s /usr/local/ccache-4.6.3/bin/ccache /usr/local/bin/ccache +ENV CCACHE_MAXSIZE=50G \ + CCACHE_LIMIT_MULTIPLE=0.8 \ + CCACHE_SLOPPINESS=clang_index_store,time_macros,include_file_mtime + +# configure ssh +RUN sed -i "s/^#PermitRootLogin/PermitRootLogin/" /etc/ssh/sshd_config && \ + sed -i "s/^#PubkeyAuthentication/PubkeyAuthentication/" /etc/ssh/sshd_config && \ + sed -i "s/^#RSAAuthentication/RSAAuthentication/" /etc/ssh/sshd_config && \ + sed -i "s/#UseDNS .*/UseDNS no/" /etc/ssh/sshd_config +RUN ssh-keygen -A + +# yum clean +RUN yum clean all && \ + rm -rf /var/cache/yum && \ + rm -rf /var/lib/yum/yumdb && \ + rm -rf /var/lib/yum/history + +# Install DTK +RUN wget -q https://cancon.hpccube.com:65024/file/1/DTK-24.04.1/CentOS7.6/DTK-24.04.1-CentOS7.6-x86_64.tar.gz && \ + tar zxf DTK-24.04.1-CentOS7.6-x86_64.tar.gz && rm -rf DTK-24.04.1-CentOS7.6-x86_64.tar.gz +# Replace if you use other device type, e.g. Z100, Z100L, K100 +RUN wget -q https://paddle-device.bj.bcebos.com/dcu/hyhal-K100AI.tar.gz && \ + tar zxf hyhal-K100AI.tar.gz && rm -rf hyhal-K100AI.tar.gz +RUN echo "source /opt/dtk-24.04.1/env.sh" >> /root/.bashrc +# Disable compile warnings +RUN sed -i '74d' /opt/dtk-24.04.1/include/rocrand/rocrand_common.h + +# generate core dump +RUN echo "kernel.core_pattern=core_%e_%p_%t" >> /etc/sysctl.conf && \ + echo "kernel.core_uses_pid=0" >> /etc/sysctl.conf + +EXPOSE 22 diff --git a/tools/dockerfile/Dockerfile.develop.xre b/tools/dockerfile/Dockerfile.develop.xre new file mode 100644 index 00000000000000..b8913ad376a41f --- /dev/null +++ b/tools/dockerfile/Dockerfile.develop.xre @@ -0,0 +1,113 @@ +# Docker Image for PaddlePaddle Kunlun XPU + +FROM ubuntu:20.04 +LABEL maintainer="PaddlePaddle Authors " + +RUN apt-get update && apt-get install -y apt-utils +RUN ln -snf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata +RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository ppa:deadsnakes/ppa && add-apt-repository ppa:ubuntu-toolchain-r/test +RUN apt-get update && apt-get install -y curl wget vim git unzip unrar tar ntp xz-utils libssl-dev bzip2 gzip make automake \ + coreutils language-pack-zh-hans libsm6 libxext6 libxrender-dev libgl1-mesa-glx libsqlite3-dev libopenblas-dev liblapack3 \ + bison libjpeg-dev zlib1g zlib1g-dev swig locales net-tools libtool numactl libnuma-dev liblzma-dev libbz2-dev libblas-dev \ + openssl openssh-server libffi-dev pciutils libblas3 liblapack-dev libzstd-dev default-jre libgcc-s1 gcc g++ gfortran gdb + +# workdir +WORKDIR /opt + +# GCC 8.4 +RUN apt-get install -y gcc-8 g++-8 gfortran-8 +RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 90 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 90 && \ + update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-8 90 + +# cmake 3.27.7 +RUN wget -q https://cmake.org/files/v3.27/cmake-3.27.7-linux-x86_64.sh && \ + chmod +x cmake-3.27.7-linux-x86_64.sh && mkdir -p /opt/cmake-3.27.7 && \ + ./cmake-3.27.7-linux-x86_64.sh --prefix=/opt/cmake-3.27.7 --skip-license && \ + rm -rf cmake-3.27.7-linux-x86_64.sh +ENV PATH=/opt/cmake-3.27.7/bin:${PATH} + +# default python version +ARG PY_VERSION=3.10 +RUN apt-get install -y python3-distutils python${PY_VERSION} python${PY_VERSION}-dev + +# install pip +RUN curl -s -q https://bootstrap.pypa.io/get-pip.py | /usr/bin/python${PY_VERSION} + +# set default python +RUN rm -rf /usr/bin/python3 && ln -s /usr/bin/python${PY_VERSION} /usr/bin/python3 && \ + rm -rf /usr/bin/python && ln -s /usr/bin/python${PY_VERSION} /usr/bin/python + +# install pylint and pre-commit +RUN pip install pre-commit==2.17.0 pylint pytest astroid isort coverage qtconsole distro +RUN pip install attrs pyyaml pathlib2 scipy requests psutil Cython clang-format==13.0.0 + +# add more libs +RUN apt-get update && apt-get install libprotobuf-dev protobuf-compiler libprotoc-dev lsof libgeos-dev \ + pkg-config libhdf5-103 libhdf5-dev lrzsz libsndfile1 tree ninja-build -y + +# install Paddle requirement +RUN wget --no-check-certificate https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt -O requirements.txt && \ + pip install -r requirements.txt -i https://pip.baidu-int.com/simple --trusted-host pip.baidu-int.com && rm -rf requirements.txt +RUN wget --no-check-certificate https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/unittest_py/requirements.txt -O requirements.txt && \ + pip install -r requirements.txt -i https://pip.baidu-int.com/simple --trusted-host pip.baidu-int.com && rm -rf requirements.txt + +# git credential to skip password typing +RUN git config --global credential.helper store + +# Fix locales to en_US.UTF-8 +RUN localedef -i en_US -f UTF-8 en_US.UTF-8 + +# patchelf 0.14.5 - https://github.com/NixOS/patchelf/pull/216 +RUN wget -q --no-check-certificate https://github.com/NixOS/patchelf/archive/refs/tags/0.14.5.tar.gz && \ + tar xzf 0.14.5.tar.gz && cd patchelf-0.14.5 && \ + ./bootstrap.sh > /dev/null && ./configure > /dev/null && \ + make -j16 > /dev/null && make install > /dev/null && \ + cd .. && rm -rf patchelf-0.14.5 && rm -rf 0.14.5.tar.gz + +# ccache 4.6.3 +RUN wget -q https://github.com/ccache/ccache/releases/download/v4.6.3/ccache-4.6.3.tar.gz && \ + tar xf ccache-4.6.3.tar.gz && mkdir /usr/local/ccache-4.6.3 && cd ccache-4.6.3 && \ + mkdir build && cd build && \ + cmake -DCMAKE_BUILD_TYPE=Release -DREDIS_STORAGE_BACKEND=OFF \ + -DCMAKE_INSTALL_PREFIX=/usr/local/ccache-4.6.3 .. > /dev/null && \ + make -j16 > /dev/null && make install > /dev/null && \ + cd ../../ && rm -rf ccache-4.6.3.tar.gz && rm -rf ccache-4.6.3 && \ + ln -s /usr/local/ccache-4.6.3/bin/ccache /usr/local/bin/ccache +ENV CCACHE_MAXSIZE=80G \ + CCACHE_LIMIT_MULTIPLE=0.8 \ + CCACHE_SLOPPINESS=clang_index_store,time_macros,include_file_mtime + +# Install XRE 4.31.0 +ARG XRE_VERSION=4.31.0 +ARG XRE_INSTALL=/usr/local/xpu-${XRE_VERSION} +RUN wget -q https://klx-sdk-release-public.su.bcebos.com/xre/release/${XRE_VERSION}.1/xre-ubuntu_2004_x86_64.tar.gz && \ + tar -zxf xre-ubuntu_2004_x86_64.tar.gz && \ + mkdir -p ${XRE_INSTALL} && \ + cp -af /opt/xre-ubuntu_2004_x86_64/bin/ ${XRE_INSTALL}/ && \ + cp -af /opt/xre-ubuntu_2004_x86_64/include/ ${XRE_INSTALL}/ && \ + cp -af /opt/xre-ubuntu_2004_x86_64/tools/ ${XRE_INSTALL}/ && \ + cp -af /opt/xre-ubuntu_2004_x86_64/version.txt ${XRE_INSTALL}/ && \ + mkdir -p ${XRE_INSTALL}/lib64 && \ + cp -af /opt/xre-ubuntu_2004_x86_64/lib/* ${XRE_INSTALL}/lib64/ && \ + cp -af /opt/xre-ubuntu_2004_x86_64/so/* ${XRE_INSTALL}/lib64/ && \ + ln -sf ${XRE_INSTALL} /usr/local/xpu && \ + ln -sf ${XRE_INSTALL}/bin/xpu_smi /usr/local/bin/xpu_smi && \ + rm -rf xre-ubuntu_2004_x86_64.tar.gz && rm -rf xre-ubuntu_2004_x86_64/ +ENV PATH=${XRE_INSTALL}/bin:$PATH + +# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service +RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && \ + sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && \ + sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config +CMD source ~/.bashrc + +# /proc/sys/kernel/core_pattern +RUN mkdir -p /var/core + +# Clean +RUN apt-get clean -y +RUN pip cache purge + +EXPOSE 22 diff --git a/tools/dockerfile/Dockerfile.rocm b/tools/dockerfile/Dockerfile.rocm deleted file mode 100644 index 9b3e6c4b2f123d..00000000000000 --- a/tools/dockerfile/Dockerfile.rocm +++ /dev/null @@ -1,153 +0,0 @@ -# A image for building paddle binaries -# Use rocm-terminal base image for both rocm environment -# When you modify it, please be aware of rocm version -# -# Build: ROCM 4.0.1 -# cd Paddle/tools/dockerfile -# docker build -f Dockerfile.rocm \ -# -t paddlepaddle/paddle-centos-rocm401-dev:latest . -# -# docker run -it --device=/dev/kfd --device=/dev/dri \ -# --security-opt seccomp=unconfined --group-add video \ -# paddlepaddle/paddle-centos-rocm401-dev:latest /bin/bash - -FROM centos:7.8.2003 -MAINTAINER PaddlePaddle Authors - -ENV LC_ALL en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US.UTF-8 - -RUN yum install -y epel-release deltarpm sudo openssh-server gettext-devel sqlite-devel \ - zlib-devel openssl-devel pcre-devel vim tk-devel tkinter libtool xz graphviz wget curl-devel \ - make bzip2 git patch unzip bison yasm diffutils automake which file kernel-headers kernel-devel \ - net-tools numactl-devel chrpath screen initscripts - -# Install devtoolset-7 -RUN yum install -y yum-utils centos-release-scl && \ - yum-config-manager --enable rhel-server-rhscl-7-rpms && \ - yum-config-manager --enable rhel-7-server-rpms && \ - yum-config-manager --enable rhel-7-server-optional-rpms && \ - INSTALL_PKGS="devtoolset-7-binutils devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran devtoolset-7-gdb" && \ - yum install -y --setopt=tsflags=nodocs $INSTALL_PKGS && \ - rpm -V $INSTALL_PKGS && \ - yum -y clean all --enablerepo='*' -ENV PATH=/opt/rh/devtoolset-7/root/usr/bin:$PATH -ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-7/root/usr/lib64:/opt/rh/devtoolset-7/root/usr/lib:$LD_LIBRARY_PATH -RUN echo "source scl_source enable devtoolset-7" > "/etc/profile.d/devtoolset-7.sh" - -# cmake 3.16.0 -WORKDIR /opt -RUN wget -q https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.tar.gz && \ - tar -zxvf cmake-3.18.0-Linux-x86_64.tar.gz && rm cmake-3.18.0-Linux-x86_64.tar.gz && \ - mv cmake-3.18.0-Linux-x86_64 cmake-3.16 -ENV PATH=/opt/cmake-3.18/bin:${PATH} - -# ROCM -RUN yum install -y kmod wget openblas-devel epel-release -RUN echo "[ROCm]" > /etc/yum.repos.d/rocm.repo && \ - echo "name=ROCm" >> /etc/yum.repos.d/rocm.repo && \ - echo "baseurl=http://repo.radeon.com/rocm/yum/4.0.1" >> /etc/yum.repos.d/rocm.repo && \ - echo "enabled=1" >> /etc/yum.repos.d/rocm.repo && \ - echo "gpgcheck=0" >> /etc/yum.repos.d/rocm.repo -RUN yum install -y rocm-dev rocm-utils rocfft miopen-hip rocblas hipsparse rocrand rccl hipcub rocthrust rocprofiler-dev roctracer-dev -# fix rocthrust -RUN sed -i '21 a #include ' /opt/rocm/include/thrust/system/hip/detail/error.inl -# export ROCM env -ENV ROCM_PATH=/opt/rocm -ENV HIP_PATH=/opt/rocm/hip -ENV HIP_CLANG_PATH=/opt/rocm/llvm/bin -ENV PATH=/opt/rocm/bin:$PATH -ENV PATH=/opt/rocm/opencl/bin:$PATH -ENV LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH - -# git 2.17.1 -RUN cd /opt && wget -q https://paddle-ci.gz.bcebos.com/git-2.17.1.tar.gz && \ - tar -xvf git-2.17.1.tar.gz && \ - cd git-2.17.1 && \ - ./configure --with-openssl --prefix=/usr/local && \ - make -j8 && make install && \ - cd .. && rm -rf git-2.17.1.tar.gz && rm -rf git-2.17.1 - -ENV GOROOT=/usr/local/go -ENV GOPATH=/root/gopath -ENV PATH=${GOROOT}/bin:${GOPATH}/bin:${PATH} - -# go 1.8.1 -RUN wget --no-check-certificate -qO- https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | \ - tar -xz -C /usr/local && \ - mkdir /root/gopath && \ - mkdir /root/gopath/bin && \ - mkdir /root/gopath/src - -# protobuf 3.6.1 -RUN cd /opt && wget -q --no-check-certificate https://paddle-ci.cdn.bcebos.com/protobuf-cpp-3.6.1.tar.gz && \ - tar xzf protobuf-cpp-3.6.1.tar.gz && \ - cd protobuf-3.6.1 && ./configure && make -j4 && make install && \ - cd .. && rm -f protobuf-cpp-3.6.1.tar.gz && rm -rf protobuf-3.6.1 - -# conda -ENV CONDA_FILE=Miniconda3-py38_23.10.0-1-Linux-x86_64.sh -RUN cd /opt && wget https://repo.anaconda.com/miniconda/${CONDA_FILE} && chmod +x ${CONDA_FILE} -RUN mkdir /opt/conda && ./${CONDA_FILE} -b -f -p "/opt/conda" && rm -rf ${CONDA_FILE} -ENV PATH=/opt/conda/bin:${PATH} -RUN conda init bash && conda install -n base jupyter jupyterlab - -# install pytest and pre-commit -RUN /opt/conda/bin/pip install pre-commit pytest protocol PyGithub - -# install Paddle requirement -RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt -O /root/requirements.txt -RUN /opt/conda/bin/pip install -r /root/requirements.txt && \ - rm -rf /root/requirements.txt - -RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/unittest_py/requirements.txt -O /root/requirements.txt -RUN /opt/conda/bin/pip install -r /root/requirements.txt && rm -rf /root/requirements.txt - -# install PaddleClas requirement -RUN wget https://raw.githubusercontent.com/PaddlePaddle/PaddleClas/develop/requirements.txt -O /root/requirements.txt -RUN /opt/conda/bin/pip install -r /root/requirements.txt && rm -rf /root/requirements.txt - -# install PaddleDetection requirement -RUN wget https://raw.githubusercontent.com/PaddlePaddle/PaddleDetection/develop/requirements.txt -O /root/requirements.txt -RUN /opt/conda/bin/pip install -r /root/requirements.txt && rm -rf /root/requirements.txt - -# configure ssh -RUN sed -i "s/^#PermitRootLogin/PermitRootLogin/" /etc/ssh/sshd_config && \ - sed -i "s/^#PubkeyAuthentication/PubkeyAuthentication/" /etc/ssh/sshd_config && \ - sed -i "s/^#RSAAuthentication/RSAAuthentication/" /etc/ssh/sshd_config - -# clang-format 3.8 -RUN wget https://copr.fedorainfracloud.org/coprs/alonid/llvm-3.8.0/repo/epel-7/alonid-llvm-3.8.0-epel-7.repo -P /etc/yum.repos.d/ -RUN yum install -y clang-3.8.0 -ENV PATH=/opt/llvm-3.8.0/bin:${PATH} - -# patchelf -RUN yum install -y patchelf && \ - yum clean all && \ - rm -rf /var/cache/yum && \ - rm -rf /var/lib/yum/yumdb && \ - rm -rf /var/lib/yum/history - -# swig 2.0.12 -RUN wget -O /opt/swig-2.0.12.tar.gz https://sourceforge.net/projects/swig/files/swig/swig-2.0.12/swig-2.0.12.tar.gz/download && \ - cd /opt && tar xzf swig-2.0.12.tar.gz && cd /opt/swig-2.0.12 && ./configure && make && make install && \ - cd /opt && rm swig-2.0.12.tar.gz && rm -rf swig-2.0.12 - -# ccache 3.7.9 -RUN cd /opt && wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \ - tar xf ccache-3.7.9.tar.gz && mkdir /usr/local/ccache-3.7.9 && cd ccache-3.7.9 && \ - ./configure -prefix=/usr/local/ccache-3.7.9 && \ - make -j8 && make install && \ - ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache && \ - cd .. && rm -rf ccache-3.7.9.tar.gz && rm -rf ccache-3.7.9 - -# configure ssh -RUN sed -i "s/^#PermitRootLogin/PermitRootLogin/" /etc/ssh/sshd_config && \ - sed -i "s/^#PubkeyAuthentication/PubkeyAuthentication/" /etc/ssh/sshd_config && \ - sed -i "s/^#RSAAuthentication/RSAAuthentication/" /etc/ssh/sshd_config && \ - sed -i "s/#UseDNS .*/UseDNS no/" /etc/ssh/sshd_config - -RUN ssh-keygen -A - -EXPOSE 22 From 9ce1226330b935c8b4ab7353bf01f089ef497c6d Mon Sep 17 00:00:00 2001 From: WangZhen <23097963+0x45f@users.noreply.github.com> Date: Thu, 4 Jul 2024 15:37:59 +0800 Subject: [PATCH 15/16] [CINN]Fix AddNOpInferSymbolicShape check (#65672) --- .../fluid/pir/dialect/operator/ir/manual_op.cc | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/pir/dialect/operator/ir/manual_op.cc b/paddle/fluid/pir/dialect/operator/ir/manual_op.cc index 3e81ae4cfe69f5..0f08f02fa2a6e9 100644 --- a/paddle/fluid/pir/dialect/operator/ir/manual_op.cc +++ b/paddle/fluid/pir/dialect/operator/ir/manual_op.cc @@ -252,7 +252,6 @@ bool AddNOpInferSymbolicShape(pir::Operation *op, "should be larger than 0. But received X's dimensions %d.", inputs_shape.size())); symbol::TensorShapeOrDataDimExprs candidate_shape = inputs_shape.front(); - size_t candidate_idx = 0; for (size_t i = 1; i < inputs_shape.size(); ++i) { // 0D tensor if (inputs_shape[i].shape().size() == 0) { @@ -260,19 +259,12 @@ bool AddNOpInferSymbolicShape(pir::Operation *op, } if (candidate_shape.shape().size() == 0) { candidate_shape = inputs_shape[i]; - candidate_idx = i; continue; } - PADDLE_ENFORCE_EQ(candidate_shape, - inputs_shape[i], - common::errors::InvalidArgument( - "The input tensor X of AddNOp must" - " have same shape. But received X[%d]'s shape = " - "[%s], X[%d]'s shape = [%s].", - candidate_idx, - candidate_shape, - i, - inputs_shape[i])); + for (size_t j = 0; j < candidate_shape.shape().size(); ++j) { + infer_context->AddEqualCstr(candidate_shape.shape()[j], + inputs_shape[i].shape()[j]); + } } infer_context->SetShapeOrDataForValue( op->result(0), symbol::ShapeOrDataDimExprs{candidate_shape}); From 7f88ea356b675ece47ce8b2b46336214881fcdcb Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Thu, 4 Jul 2024 15:43:09 +0800 Subject: [PATCH 16/16] update ci case for auto_parallel, change model from gpt to llama (#65676) --- tools/auto_parallel/ci_auto_parallel.sh | 32 ++++-------------------- tools/auto_parallel/target_path_lists.sh | 7 +----- 2 files changed, 6 insertions(+), 33 deletions(-) diff --git a/tools/auto_parallel/ci_auto_parallel.sh b/tools/auto_parallel/ci_auto_parallel.sh index 2fbb47ec371124..6145eaf42e9169 100644 --- a/tools/auto_parallel/ci_auto_parallel.sh +++ b/tools/auto_parallel/ci_auto_parallel.sh @@ -68,7 +68,7 @@ for file_name in `git diff --numstat upstream/${AGILE_COMPILE_BRANCH} |awk '{pri # while the other tests of llama model will be executed in PR-CI-Auto-Parallel. for ((i=0; i<${#target_lists_for_semi_auto_ci[@]}; i++)); do if [[ $i != ${test_auto_num} ]] && [[ ${file_item} == *${target_lists_for_semi_auto_ci[i]}* ]];then - case_list[${#case_list[*]}]=gpt-3_auto + case_list[${#case_list[*]}]=llama_auto case_list[${#case_list[*]}]="llama_auto_unit_test" break elif [[ $i == ${test_auto_num} ]] && [[ ${file_item} == *${target_lists_for_semi_auto_ci[i]}* ]];then @@ -78,14 +78,6 @@ for file_name in `git diff --numstat upstream/${AGILE_COMPILE_BRANCH} |awk '{pri continue fi done - for ((i=0; i<${#target_lists_for_pir_ci[@]}; i++)); do - if [[ ${file_item} == *${target_lists_for_pir_ci[i]}* ]];then - case_list[${#case_list[*]}]=gpt-3_auto_pir - break - else - continue - fi - done # The dynamic unittests have been monitored in PR-CI-Distribute-stable # and will be no longer redundantly executed in PR-CI-Auto-Parallel. for ((i=0; i<${#target_lists_for_dygraph_ci[@]}; i++)); do @@ -120,14 +112,6 @@ fi get_diff_TO_case # Remove duplicates and store the results back to the original list -#################### -if [[ "${case_list[*]}" == *"gpt-3_auto"* ]] && [[ "${case_list[*]}" == *"gpt-3_auto_pir"* ]]; then - echo "同时命中gpt-3_auto 和 gpt-3_auto_pir, 只执行新ir, 不执行旧ir" - case_list=("${case_list[@]/*gpt-3_auto_pir*/}") - case_list=("${case_list[@]/*gpt-3_auto*/}") - case_list[${#case_list[*]}]=gpt-3_auto_pir - echo ${case_list[*]} -fi #################### case_list=($(awk -v RS=' ' '!a[$1]++' <<< ${case_list[*]})) if [[ ${#case_list[*]} -ne 0 ]];then @@ -142,17 +126,11 @@ if [[ ${#case_list[*]} -ne 0 ]];then export FLAGS_install_deps=0 for case in ${case_list[*]};do echo -e "\033[31m ---- running case $case_num/${#case_list[*]}: ${case} \033" - if [[ ${case} == "gpt-3_auto" ]];then - bash /workspace/PaddleNLP/scripts/distribute/ci_case_auto.sh gpt_case_list_auto $FLAGS_install_deps $FLAGS_download_data - print_info $? `ls -lt ${log_path} | grep "gpt" | grep -v "pir" | head -n 1 | awk '{print $9}'` ${case} - export FLAGS_install_deps=1 - export FLAGS_download_data="gpt ""$FLAGS_download_data" - let case_num++ - elif [[ ${case} == "gpt-3_auto_pir" ]];then - bash /workspace/PaddleNLP/scripts/distribute/ci_case_auto.sh gpt_case_list_auto_pir $FLAGS_install_deps $FLAGS_download_data - print_info $? `ls -lt ${log_path} | grep "pir" | head -n 1 | awk '{print $9}'` ${case} + if [[ ${case} == "llama_auto" ]];then + bash /workspace/PaddleNLP/scripts/distribute/ci_case_auto.sh llama_case_list_auto $FLAGS_install_deps $FLAGS_download_data + print_info $? `ls -lt ${log_path} | grep "llama" | head -n 1 | awk '{print $9}'` ${case} export FLAGS_install_deps=1 - export FLAGS_download_data="gpt ""$FLAGS_download_data" + export FLAGS_download_data="llama ""$FLAGS_download_data" let case_num++ elif [[ ${case} == "auto_unit_test" ]];then bash /workspace/Paddle/tools/auto_parallel/ci_case_unit.sh auto_unit_test diff --git a/tools/auto_parallel/target_path_lists.sh b/tools/auto_parallel/target_path_lists.sh index 033479e7d9a576..fdf5419aafb053 100644 --- a/tools/auto_parallel/target_path_lists.sh +++ b/tools/auto_parallel/target_path_lists.sh @@ -10,7 +10,7 @@ # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and +# See the License for the specific language governing permissions and # limitations under the License. target_lists_for_semi_auto_ci=( @@ -25,15 +25,10 @@ target_lists_for_semi_auto_ci=( "paddle/phi/api/generator/dist_bw_api_gen.py" "tools/auto_parallel/target_path_lists.sh" "test/auto_parallel" -) - -target_lists_for_pir_ci=( - "paddle/fluid/framework/new_executor" "paddle/fluid/ir_adaptor/" "paddle/fluid/pir/dialect" "paddle/fluid/pir/transforms" "paddle/pir" - "tools/auto_parallel/target_path_lists.sh" ) target_lists_for_dygraph_ci=(