From a8afed69971884f65546adc33f3d9a5cbd54a44e Mon Sep 17 00:00:00 2001 From: liutiexing <74819124+liutiexing@users.noreply.github.com> Date: Mon, 10 Jan 2022 15:30:23 +0800 Subject: [PATCH] Profiler skeleton (#38826) * add align for WorkQueue * add spinlock * merge develop * merge * Add EventsWaiter * Revert "Add EventsWaiter" This reverts commit e206173aa9be7401b83a53581627bfaf557c8fb2. * profiler skeleton * update * update * update Co-authored-by: liutiexing --- .../new_executor/workqueue/CMakeLists.txt | 3 +- .../new_executor/workqueue/workqueue.cc | 4 +- paddle/fluid/platform/CMakeLists.txt | 3 +- paddle/fluid/platform/event.h | 34 ---------- paddle/fluid/platform/profiler.cc | 2 +- paddle/fluid/platform/profiler.h | 2 +- paddle/fluid/platform/profiler/CMakeLists.txt | 1 + .../platform/{ => profiler}/event_tracing.h | 0 .../{ => profiler}/host_event_recorder.cc | 4 +- .../{ => profiler}/host_event_recorder.h | 38 ++++++++++- .../platform/profiler/trace_event_collector.h | 65 +++++++++++++++++++ paddle/fluid/platform/profiler/tracer_base.h | 42 ++++++++++++ 12 files changed, 154 insertions(+), 44 deletions(-) create mode 100644 paddle/fluid/platform/profiler/CMakeLists.txt rename paddle/fluid/platform/{ => profiler}/event_tracing.h (100%) rename paddle/fluid/platform/{ => profiler}/host_event_recorder.cc (93%) rename paddle/fluid/platform/{ => profiler}/host_event_recorder.h (84%) create mode 100644 paddle/fluid/platform/profiler/trace_event_collector.h create mode 100644 paddle/fluid/platform/profiler/tracer_base.h diff --git a/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt b/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt index 77130102d52e5..f47a274aaa4e5 100644 --- a/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt @@ -1,2 +1,3 @@ -cc_library(workqueue SRCS workqueue.cc workqueue_utils.cc events_waiter.cc DEPS enforce glog) +cc_library(workqueue_utils SRCS workqueue_utils.cc events_waiter.cc DEPS enforce glog) +cc_library(workqueue SRCS workqueue.cc DEPS workqueue_utils enforce glog) cc_test(workqueue_test SRCS workqueue_test.cc DEPS workqueue) diff --git a/paddle/fluid/framework/new_executor/workqueue/workqueue.cc b/paddle/fluid/framework/new_executor/workqueue/workqueue.cc index 3f06f3db23118..45694349168a4 100644 --- a/paddle/fluid/framework/new_executor/workqueue/workqueue.cc +++ b/paddle/fluid/framework/new_executor/workqueue/workqueue.cc @@ -198,7 +198,7 @@ std::unique_ptr CreateMultiThreadedWorkQueue( "WorkQueueOptions.num_threads must be " "greater than 1.")); std::unique_ptr ptr(new WorkQueueImpl(options)); - return std::move(ptr); + return ptr; } std::unique_ptr CreateWorkQueueGroup( @@ -208,7 +208,7 @@ std::unique_ptr CreateWorkQueueGroup( "For a WorkQueueGroup, the number of WorkQueueOptions " "must be greater than 1.")); std::unique_ptr ptr(new WorkQueueGroupImpl(queues_options)); - return std::move(ptr); + return ptr; } } // namespace framework diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 1031d1ed6357d..8a84429987d90 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -169,7 +169,8 @@ cc_test(timer_test SRCS timer_test.cc DEPS timer) cc_library(lodtensor_printer SRCS lodtensor_printer.cc DEPS ddim place tensor scope lod_tensor variable_helper framework_proto) cc_test(lodtensor_printer_test SRCS lodtensor_printer_test.cc DEPS lodtensor_printer) -cc_library(host_event_recorder SRCS host_event_recorder.cc DEPS os_info) +add_subdirectory(profiler) + cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS}) if(WITH_GPU) nv_library(profiler SRCS profiler.cc profiler.cu DEPS host_event_recorder os_info device_tracer gpu_info enforce dynload_cuda) diff --git a/paddle/fluid/platform/event.h b/paddle/fluid/platform/event.h index 919266575e6ce..da5080cc86f0c 100644 --- a/paddle/fluid/platform/event.h +++ b/paddle/fluid/platform/event.h @@ -201,39 +201,5 @@ class CudaEvent { #endif }; -struct CommonEvent { - public: - CommonEvent(const char *name, uint64_t start_ns, uint64_t end_ns, - EventRole role) - : name(name), start_ns(start_ns), end_ns(end_ns), role(role) {} - - CommonEvent(std::function &arena_allocator, - const std::string &name_str, uint64_t start_ns, uint64_t end_ns, - EventRole role, const std::string &attr_str) - : start_ns(start_ns), end_ns(end_ns), role(role) { - auto buf = static_cast(arena_allocator(name_str.length() + 1)); - strncpy(buf, name_str.c_str(), name_str.length() + 1); - name = buf; - buf = static_cast(arena_allocator(attr_str.length() + 1)); - strncpy(buf, attr_str.c_str(), attr_str.length() + 1); - attr = buf; - } - - CommonEvent(const std::function &arena_allocator, - const std::string &name_str, uint64_t start_ns, uint64_t end_ns, - EventRole role) - : start_ns(start_ns), end_ns(end_ns), role(role) { - auto buf = static_cast(arena_allocator(name_str.length() + 1)); - strncpy(buf, name_str.c_str(), name_str.length() + 1); - name = buf; - } - - const char *name = nullptr; // not owned, designed for performance - uint64_t start_ns = 0; - uint64_t end_ns = 0; - EventRole role = EventRole::kOrdinary; - const char *attr = nullptr; // not owned, designed for performance -}; - } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index eaa77273c8fd4..c4beac93ef134 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -20,8 +20,8 @@ limitations under the License. */ #include "paddle/fluid/platform/device_tracer.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/host_event_recorder.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/host_event_recorder.h" #include "paddle/fluid/platform/profiler_helper.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/dynload/nvtx.h" diff --git a/paddle/fluid/platform/profiler.h b/paddle/fluid/platform/profiler.h index 41cc3805f44da..122e19b7c2808 100644 --- a/paddle/fluid/platform/profiler.h +++ b/paddle/fluid/platform/profiler.h @@ -27,9 +27,9 @@ limitations under the License. */ #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/event.h" -#include "paddle/fluid/platform/event_tracing.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.pb.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include "paddle/fluid/platform/device/gpu/gpu_info.h" #endif diff --git a/paddle/fluid/platform/profiler/CMakeLists.txt b/paddle/fluid/platform/profiler/CMakeLists.txt new file mode 100644 index 0000000000000..de22183df6034 --- /dev/null +++ b/paddle/fluid/platform/profiler/CMakeLists.txt @@ -0,0 +1 @@ +cc_library(host_event_recorder SRCS host_event_recorder.cc DEPS os_info) diff --git a/paddle/fluid/platform/event_tracing.h b/paddle/fluid/platform/profiler/event_tracing.h similarity index 100% rename from paddle/fluid/platform/event_tracing.h rename to paddle/fluid/platform/profiler/event_tracing.h diff --git a/paddle/fluid/platform/host_event_recorder.cc b/paddle/fluid/platform/profiler/host_event_recorder.cc similarity index 93% rename from paddle/fluid/platform/host_event_recorder.cc rename to paddle/fluid/platform/profiler/host_event_recorder.cc index 750f39118d7d9..14054418c5d24 100644 --- a/paddle/fluid/platform/host_event_recorder.cc +++ b/paddle/fluid/platform/profiler/host_event_recorder.cc @@ -9,7 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/host_event_recorder.h" +#include "paddle/fluid/platform/profiler/host_event_recorder.h" #include "paddle/fluid/platform/os_info.h" namespace paddle { @@ -26,7 +26,7 @@ HostEventSection HostEventRecorder::GatherEvents() { for (auto &kv : thread_recorders_) { host_sec.thr_sections.emplace_back(std::move(kv.second->GatherEvents())); } - return std::move(host_sec); + return host_sec; } } // namespace platform diff --git a/paddle/fluid/platform/host_event_recorder.h b/paddle/fluid/platform/profiler/host_event_recorder.h similarity index 84% rename from paddle/fluid/platform/host_event_recorder.h rename to paddle/fluid/platform/profiler/host_event_recorder.h index e8dd59ad4c6f1..071f0d65bd0a6 100644 --- a/paddle/fluid/platform/host_event_recorder.h +++ b/paddle/fluid/platform/profiler/host_event_recorder.h @@ -25,6 +25,40 @@ limitations under the License. */ namespace paddle { namespace platform { +struct CommonEvent { + public: + CommonEvent(const char *name, uint64_t start_ns, uint64_t end_ns, + EventRole role) + : name(name), start_ns(start_ns), end_ns(end_ns), role(role) {} + + CommonEvent(std::function &arena_allocator, + const std::string &name_str, uint64_t start_ns, uint64_t end_ns, + EventRole role, const std::string &attr_str) + : start_ns(start_ns), end_ns(end_ns), role(role) { + auto buf = static_cast(arena_allocator(name_str.length() + 1)); + strncpy(buf, name_str.c_str(), name_str.length() + 1); + name = buf; + buf = static_cast(arena_allocator(attr_str.length() + 1)); + strncpy(buf, attr_str.c_str(), attr_str.length() + 1); + attr = buf; + } + + CommonEvent(const std::function &arena_allocator, + const std::string &name_str, uint64_t start_ns, uint64_t end_ns, + EventRole role) + : start_ns(start_ns), end_ns(end_ns), role(role) { + auto buf = static_cast(arena_allocator(name_str.length() + 1)); + strncpy(buf, name_str.c_str(), name_str.length() + 1); + name = buf; + } + + const char *name = nullptr; // not owned, designed for performance + uint64_t start_ns = 0; + uint64_t end_ns = 0; + EventRole role = EventRole::kOrdinary; + const char *attr = nullptr; // not owned, designed for performance +}; + template struct ContainsStdString : std::conditional_t< @@ -154,7 +188,7 @@ std::vector EventContainer::Reduce() { cur = next; } event_blocks_ = cur_event_block_ = new EventBlock; - return std::move(all_events); + return all_events; } template @@ -204,7 +238,7 @@ class ThreadEventRecorder { thr_sec.thread_name = thread_name_; thr_sec.thread_id = thread_id_; thr_sec.events = std::move(base_evt_cntr_.Reduce()); - return std::move(thr_sec); + return thr_sec; } private: diff --git a/paddle/fluid/platform/profiler/trace_event_collector.h b/paddle/fluid/platform/profiler/trace_event_collector.h new file mode 100644 index 0000000000000..eabafb73542dc --- /dev/null +++ b/paddle/fluid/platform/profiler/trace_event_collector.h @@ -0,0 +1,65 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +namespace paddle { +namespace platform { + +struct HostRecord { + std::string name; + uint64_t start_ns; + uint64_t end_ns; + uint64_t process_id; + uint64_t thread_id; +}; + +struct RuntimeRecord { + std::string name; + uint64_t start_ns; + uint64_t end_ns; + uint64_t process_id; + uint64_t thread_id; + uint32_t correlation_id; +}; + +struct DeviceRecord { + std::string name; + uint64_t start_ns; + uint64_t end_ns; + uint32_t correlation_id; +}; + +class TraceEventCollector { + public: + void AddHostRecord(HostRecord&& record) { host_records_.push_back(record); } + + void AddRuntimeRecord(RuntimeRecord&& record) { + runtime_records_.push_back(record); + } + + void AddDeviceRecord(DeviceRecord&& record) { + device_records_.push_back(record); + } + + private: + std::list host_records_; + std::list runtime_records_; + std::list device_records_; +}; + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/profiler/tracer_base.h b/paddle/fluid/platform/profiler/tracer_base.h new file mode 100644 index 0000000000000..1d4e3447fe64e --- /dev/null +++ b/paddle/fluid/platform/profiler/tracer_base.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/fluid/platform/profiler/trace_event_collector.h" + +namespace paddle { +namespace platform { + +class TracerBase { + public: + // The state machine for a Tracer. + enum class TracerState { UNINITED, READY, STARTED, STOPED }; + + virtual void PrepareTracing() { state_ = TracerState::READY; } + + virtual void StartTracing() = 0; + + virtual void StopTracing() = 0; + + virtual void CollectTraceData(TraceEventCollector* collector) = 0; + + virtual ~TracerBase() {} + + protected: + TracerState state_ = TracerState::UNINITED; +}; + +} // namespace platform +} // namespace paddle