From cb8479c34b392af186df28dc99efa5ca28140af6 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Mon, 10 Jun 2019 22:16:53 -0700 Subject: [PATCH 01/38] working version --- src/engine/threaded_engine.cc | 6 +- src/engine/threaded_engine.h | 1 + src/operator/custom/custom-inl.h | 6 +- src/operator/custom/custom.cc | 4 +- src/profiler/custom_op_profiler.h | 93 +++++++++++++++++++++++++++++++ src/profiler/profiler.h | 42 ++++++++++++-- 6 files changed, 143 insertions(+), 9 deletions(-) create mode 100644 src/profiler/custom_op_profiler.h diff --git a/src/engine/threaded_engine.cc b/src/engine/threaded_engine.cc index 38311908bdcd..62882e116d35 100644 --- a/src/engine/threaded_engine.cc +++ b/src/engine/threaded_engine.cc @@ -333,9 +333,11 @@ void ThreadedEngine::PushAsync(AsyncFn fn, Context exec_ctx, << device_count_; } #endif - ThreadedOpr *opr = NewOperator(std::move(fn), const_vars, mutable_vars, prop, opr_name, wait); - opr->temporary = true; const bool profiling = profiler_->IsProfiling(profiler::Profiler::kImperative); + const char* d_name = profiling ? profiler::CustomOpProfiler::Get()->GetDisplayName(opr_name) : NULL; + const char* display_name = d_name ? d_name : opr_name; + ThreadedOpr *opr = NewOperator(std::move(fn), const_vars, mutable_vars, prop, display_name, wait); + opr->temporary = true; Push(opr, exec_ctx, priority, profiling); } diff --git a/src/engine/threaded_engine.h b/src/engine/threaded_engine.h index c39f322596ae..3d56b7c79b90 100644 --- a/src/engine/threaded_engine.h +++ b/src/engine/threaded_engine.h @@ -43,6 +43,7 @@ #include "../profiler/profiler.h" #include "./openmp.h" #include "../common/object_pool.h" +#include "../profiler/custom_op_profiler.h" namespace mxnet { namespace engine { diff --git a/src/operator/custom/custom-inl.h b/src/operator/custom/custom-inl.h index 3bf63b75cfdb..c2a261d9e560 100644 --- a/src/operator/custom/custom-inl.h +++ b/src/operator/custom/custom-inl.h @@ -43,6 +43,7 @@ #include #include #include "../operator_common.h" +#include "../../profiler/custom_op_profiler.h" namespace mxnet { namespace op { @@ -76,7 +77,8 @@ class CustomOperator { bool training, const std::vector& arrs, const std::vector& tags, const std::unordered_set& output_tags, - const std::vector& outputs) { + const std::vector& outputs, + const std::string op_type = "") { if (naive_engine_) { func(); for (size_t i = 0, out_idx = 0; i < arrs.size(); i++) { @@ -97,7 +99,9 @@ class CustomOperator { bool prev_training = Imperative::Get()->set_is_training(training); try { + profiler::CustomOpProfiler::Get()->OnCustomBegin(op_type); func(); + profiler::CustomOpProfiler::Get()->OnCustomEnd(); } catch (dmlc::Error& e) { exception_ = std::make_shared(std::current_exception()); diff --git a/src/operator/custom/custom.cc b/src/operator/custom/custom.cc index 77fe2e6e4b1c..11607e78ae1b 100644 --- a/src/operator/custom/custom.cc +++ b/src/operator/custom/custom.cc @@ -345,7 +345,7 @@ void ForwardEx(const OpStatePtr& state, const OpContext& ctx, static_cast(ctx.is_train), params.info->contexts[kCustomOpForward])); }, - ctx, false, ctx.is_train, cpys, tags, output_tags, outputs); + ctx, false, ctx.is_train, cpys, tags, output_tags, outputs, params.op_type); } void BackwardEx(const OpStatePtr& state, const OpContext& ctx, @@ -415,7 +415,7 @@ void BackwardEx(const OpStatePtr& state, const OpContext& ctx, ptrs.size(), const_cast(ptrs.data()), const_cast(tags.data()), reinterpret_cast(req.data()), static_cast(ctx.is_train), params.info->contexts[kCustomOpBackward])); - }, ctx, false, ctx.is_train, cpys, tags, output_tags, outputs); + }, ctx, false, ctx.is_train, cpys, tags, output_tags, outputs, params.op_type); } // infer storage backward function for custom op which assigns kDefaultStorage for diff --git a/src/profiler/custom_op_profiler.h b/src/profiler/custom_op_profiler.h new file mode 100644 index 000000000000..7b590054657c --- /dev/null +++ b/src/profiler/custom_op_profiler.h @@ -0,0 +1,93 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +*/ +#ifndef MXNET_CUSTOM_OP_PROFILER_H_ +#define MXNET_CUSTOM_OP_PROFILER_H_ + +#include +#include +#include +#include +#include "./profiler.h" + +namespace mxnet { +namespace profiler { + +using Tid = std::thread::id; +using TaskPtr = std::shared_ptr; + +class CustomOpProfiler { +public: + static CustomOpProfiler* Get() { + static std::mutex mtx; + static std::shared_ptr prof = nullptr; + if (!prof) { + std::unique_lock lk(mtx); + if (!prof) + prof = std::make_shared(); + } + return prof.get(); + } + + void OnCustomBegin(const std::string& op_type) { + Tid tid = std::this_thread::get_id(); + const std::string task_name = op_type + "::pure_python" ; + std::lock_guard lock(mutex_); + tid_to_op_type_[tid] = op_type; + tasks_[tid] = std::make_shared(task_name.c_str(), &custom_op_domain); + tasks_[tid]->start(); + } + + void OnCustomEnd() { + Tid tid = std::this_thread::get_id(); + std::lock_guard lock(mutex_); + if (tasks_.find(tid) == tasks_.end()) { + LOG(WARNING) << "This should never happen"; + } + tasks_[tid]->stop(); + tasks_.erase(tid); + tid_to_op_type_.erase(tid); + } + + + const char* GetDisplayName(const std::string& op_type) { + Tid tid = std::this_thread::get_id(); + std::lock_guard lock(mutex_); + if (tid_to_op_type_.find(tid) == tid_to_op_type_.end()) { + return NULL; + } + std::string name = tid_to_op_type_[tid] + "::" + op_type; + display_names_.insert(name); + return display_names_.find(name)->c_str(); + } + +private: + + /*! \brief */ + std::mutex mutex_; + /* !\brief task names for sub-operators in custom ops */ + std::unordered_set display_names_; + /* */ + std::unordered_map tasks_; + /* */ + std::unordered_map tid_to_op_type_; +}; +} // namespace profiler +} // namespace mxnet + +#endif // MXNET_CUSTOM_OP_PROFILER_H_ \ No newline at end of file diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h index a6d9ecf06fee..9e0d46638fca 100644 --- a/src/profiler/profiler.h +++ b/src/profiler/profiler.h @@ -790,6 +790,10 @@ struct ProfileTask : public ProfileDuration { NVTX_ONLY_CODE(nvtx_duration_.reset(new nvtx::NVTXDuration(name))); } + void setDomain(ProfileDomain *domain) { + domain_ = domain; + } + /*! * \brief Start the profiling scope */ @@ -1111,6 +1115,11 @@ struct ProfileMarker { VTUNE_ONLY_CODE(std::unique_ptr vtune_instant_marker_); }; + +class CustomOpProfiler; +static ProfileDomain custom_op_domain("Custom Operator"); + + /*! * \brief Operator profiler object. Logs as both an independent event and a task in * the operator domain @@ -1162,10 +1171,27 @@ struct ProfileOperator : public ProfileEvent { : ProfileEvent(name) , as_task_(name, &domain_) , name_(name) +<<<<<<< HEAD , attributes_(attributes) { // make as_task_ not to add stat to AggregateStats; otherwise we will add twice as_task_.enableAggregateStats(false); SetCategories(domain_.name()); +======= + , attributes_(attributes) + , profiling_operator(true) + , profiling_task(true) { + if (strcmp(name, "CustomOperator") == 0) { + profiling_operator = false; + profiling_task = false; + } else if (strcmp(name, "Custom") == 0) { + profiling_task = false; + } else if (std::string(name).find("::") != std::string::npos) { + as_task_.setDomain(&custom_op_domain); + SetCategories(custom_op_domain.name()); + } else { + SetCategories(domain_.name()); + } +>>>>>>> working version } /*! * \brief Start the profiling scope @@ -1175,15 +1201,19 @@ struct ProfileOperator : public ProfileEvent { void start(mxnet::Context::DeviceType dev_type, uint32_t dev_id) { dev_type_ = dev_type; dev_id_ = dev_id; - ProfileEvent::start(); - as_task_.start(); + if (profiling_operator) + ProfileEvent::start(); + if (profiling_task) + as_task_.start(); } /*! * \brief Stop the profiling scope */ void stop() override { - as_task_.stop(); - ProfileEvent::stop(); + if (profiling_task) + as_task_.stop(); + if (profiling_operator) + ProfileEvent::stop(); } /*! @@ -1240,6 +1270,10 @@ struct ProfileOperator : public ProfileEvent { static ProfileDomain domain_; /*! \brief Optional operator attributes */ std::unique_ptr attributes_; + + bool profiling_operator; + + bool profiling_task; }; /* From d549f5ed6970ab2e4682b25e3aaaacfe83add7b7 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Mon, 10 Jun 2019 23:05:40 -0700 Subject: [PATCH 02/38] style fix --- src/operator/custom/custom-inl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/custom/custom-inl.h b/src/operator/custom/custom-inl.h index c2a261d9e560..576c4d5f82ee 100644 --- a/src/operator/custom/custom-inl.h +++ b/src/operator/custom/custom-inl.h @@ -78,7 +78,7 @@ class CustomOperator { const std::vector& tags, const std::unordered_set& output_tags, const std::vector& outputs, - const std::string op_type = "") { + const std::string op_type = "") { if (naive_engine_) { func(); for (size_t i = 0, out_idx = 0; i < arrs.size(); i++) { From e57ebd5314129bf1faf6e4163dca8129216ad2a2 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Wed, 12 Jun 2019 11:57:24 -0700 Subject: [PATCH 03/38] several fixes --- src/engine/threaded_engine.cc | 2 +- src/profiler/custom_op_profiler.h | 64 +++++++++++++++---------------- 2 files changed, 32 insertions(+), 34 deletions(-) diff --git a/src/engine/threaded_engine.cc b/src/engine/threaded_engine.cc index 62882e116d35..ca47d66b7a61 100644 --- a/src/engine/threaded_engine.cc +++ b/src/engine/threaded_engine.cc @@ -334,7 +334,7 @@ void ThreadedEngine::PushAsync(AsyncFn fn, Context exec_ctx, } #endif const bool profiling = profiler_->IsProfiling(profiler::Profiler::kImperative); - const char* d_name = profiling ? profiler::CustomOpProfiler::Get()->GetDisplayName(opr_name) : NULL; + const char* d_name = profiling && opr_name ? profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : NULL; const char* display_name = d_name ? d_name : opr_name; ThreadedOpr *opr = NewOperator(std::move(fn), const_vars, mutable_vars, prop, display_name, wait); opr->temporary = true; diff --git a/src/profiler/custom_op_profiler.h b/src/profiler/custom_op_profiler.h index 7b590054657c..212c0cd91604 100644 --- a/src/profiler/custom_op_profiler.h +++ b/src/profiler/custom_op_profiler.h @@ -29,55 +29,53 @@ namespace mxnet { namespace profiler { using Tid = std::thread::id; -using TaskPtr = std::shared_ptr; +using TaskPtr = std::unique_ptr; class CustomOpProfiler { public: static CustomOpProfiler* Get() { - static std::mutex mtx; - static std::shared_ptr prof = nullptr; - if (!prof) { - std::unique_lock lk(mtx); - if (!prof) - prof = std::make_shared(); - } - return prof.get(); + static CustomOpProfiler inst; + return &inst; } void OnCustomBegin(const std::string& op_type) { - Tid tid = std::this_thread::get_id(); - const std::string task_name = op_type + "::pure_python" ; - std::lock_guard lock(mutex_); - tid_to_op_type_[tid] = op_type; - tasks_[tid] = std::make_shared(task_name.c_str(), &custom_op_domain); - tasks_[tid]->start(); + const Tid tid = std::this_thread::get_id(); + const std::string task_name = op_type + "::pure_python" ; + std::lock_guard lock(mutex_); + tid_to_op_type_[tid] = op_type; + tasks_[tid] = std::make_unique(task_name.c_str(), &custom_op_domain); + tasks_[tid]->start(); } void OnCustomEnd() { - Tid tid = std::this_thread::get_id(); - std::lock_guard lock(mutex_); - if (tasks_.find(tid) == tasks_.end()) { - LOG(WARNING) << "This should never happen"; - } - tasks_[tid]->stop(); - tasks_.erase(tid); - tid_to_op_type_.erase(tid); + const Tid tid = std::this_thread::get_id(); + std::lock_guard lock(mutex_); + CHECK(tasks_.find(tid) != tasks_.end()); + tasks_[tid]->stop(); + tasks_.erase(tid); + tid_to_op_type_.erase(tid); } - const char* GetDisplayName(const std::string& op_type) { - Tid tid = std::this_thread::get_id(); - std::lock_guard lock(mutex_); - if (tid_to_op_type_.find(tid) == tid_to_op_type_.end()) { - return NULL; - } - std::string name = tid_to_op_type_[tid] + "::" + op_type; - display_names_.insert(name); - return display_names_.find(name)->c_str(); + const char* GenerateDisplayName(const char* op_type_ptr) { + if (!op_type_ptr) { + return NULL; + } + Tid tid = std::this_thread::get_id(); + std::lock_guard lock(mutex_); + if (tid_to_op_type_.find(tid) == tid_to_op_type_.end()) { + return NULL; + } + std::string op_type = std::string(op_type_ptr); + std::string name = tid_to_op_type_[tid] + "::" + op_type; + display_names_.insert(name); + return display_names_.find(name)->c_str(); } -private: +protected: + CustomOpProfiler(){}; +private: /*! \brief */ std::mutex mutex_; /* !\brief task names for sub-operators in custom ops */ From ed1794e395cad8c956d196552bf84d35d6df6b7e Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Thu, 13 Jun 2019 22:46:11 -0700 Subject: [PATCH 04/38] resolve issues in the comments --- src/engine/threaded_engine.cc | 8 +++ src/operator/custom/custom-inl.h | 2 +- src/profiler/custom_op_profiler.h | 113 ++++++++++++++++++------------ src/profiler/profiler.h | 38 +++++----- 4 files changed, 96 insertions(+), 65 deletions(-) diff --git a/src/engine/threaded_engine.cc b/src/engine/threaded_engine.cc index ca47d66b7a61..ef6e52c7135e 100644 --- a/src/engine/threaded_engine.cc +++ b/src/engine/threaded_engine.cc @@ -334,7 +334,15 @@ void ThreadedEngine::PushAsync(AsyncFn fn, Context exec_ctx, } #endif const bool profiling = profiler_->IsProfiling(profiler::Profiler::kImperative); + // auto start = std::chrono::duration_cast( + // std::chrono::high_resolution_clock::now().time_since_epoch()).count(); const char* d_name = profiling && opr_name ? profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : NULL; + // auto end = std::chrono::duration_cast( + // std::chrono::high_resolution_clock::now().time_since_epoch()).count(); + // LOG(WARNING) << opr_name; + // LOG(WARNING) << start; + // LOG(WARNING) << end; + // LOG(WARNING) << static_cast(end-start) / 1000; const char* display_name = d_name ? d_name : opr_name; ThreadedOpr *opr = NewOperator(std::move(fn), const_vars, mutable_vars, prop, display_name, wait); opr->temporary = true; diff --git a/src/operator/custom/custom-inl.h b/src/operator/custom/custom-inl.h index 576c4d5f82ee..4616946dac44 100644 --- a/src/operator/custom/custom-inl.h +++ b/src/operator/custom/custom-inl.h @@ -148,7 +148,7 @@ class CustomOperator { ctx.async_on_complete(); }, ctx.run_ctx.ctx, vars, vars2, FnProperty::kNoSkip, 0, - "CustomOperator"); + "Dummy_Wait"); }); // increase num_threads if there is not enough threads to execute custom operator if (q_.size() > num_free_threads_) diff --git a/src/profiler/custom_op_profiler.h b/src/profiler/custom_op_profiler.h index 212c0cd91604..ff81b6f6bd57 100644 --- a/src/profiler/custom_op_profiler.h +++ b/src/profiler/custom_op_profiler.h @@ -31,59 +31,80 @@ namespace profiler { using Tid = std::thread::id; using TaskPtr = std::unique_ptr; + /*! + * \brief Singleton class to assist profiling python callback of custom operators + * and to assist linking sub-operators to custom operators + */ class CustomOpProfiler { -public: - static CustomOpProfiler* Get() { - static CustomOpProfiler inst; - return &inst; - } + public: - void OnCustomBegin(const std::string& op_type) { - const Tid tid = std::this_thread::get_id(); - const std::string task_name = op_type + "::pure_python" ; - std::lock_guard lock(mutex_); - tid_to_op_type_[tid] = op_type; - tasks_[tid] = std::make_unique(task_name.c_str(), &custom_op_domain); - tasks_[tid]->start(); - } + static CustomOpProfiler* Get() { + static CustomOpProfiler inst; + return &inst; + } + /*! + * \brief Called before the callback of custom operators to start a profile task for python + * code execution time + * \param op_type The registed name of the custom operator + */ + void OnCustomBegin(const std::string& op_type) { + const Tid tid = std::this_thread::get_id(); + const std::string task_name = op_type + "::pure_python" ; + std::lock_guard lock(mutex_); + tid_to_op_type_[tid] = op_type; + tasks_[tid] = std::make_unique(task_name.c_str(), &custom_op_domain); + tasks_[tid]->start(); + } - void OnCustomEnd() { - const Tid tid = std::this_thread::get_id(); - std::lock_guard lock(mutex_); - CHECK(tasks_.find(tid) != tasks_.end()); - tasks_[tid]->stop(); - tasks_.erase(tid); - tid_to_op_type_.erase(tid); - } + /*! + * \brief Called after the callback of custom operators to stop the profile task for python + * code execution time + */ + void OnCustomEnd() { + const Tid tid = std::this_thread::get_id(); + std::lock_guard lock(mutex_); + CHECK(tasks_.find(tid) != tasks_.end()); + tasks_[tid]->stop(); + tasks_.erase(tid); + tid_to_op_type_.erase(tid); + } - - const char* GenerateDisplayName(const char* op_type_ptr) { - if (!op_type_ptr) { - return NULL; - } - Tid tid = std::this_thread::get_id(); - std::lock_guard lock(mutex_); - if (tid_to_op_type_.find(tid) == tid_to_op_type_.end()) { - return NULL; - } - std::string op_type = std::string(op_type_ptr); - std::string name = tid_to_op_type_[tid] + "::" + op_type; - display_names_.insert(name); - return display_names_.find(name)->c_str(); + /*! + * \brief Generate a display name for sub-operators, which is the name used for OprBlock + * and later by profiler, and store it in a unordered_set so that it can be referenced + * in the future + * \param op_type_ptr The registed name of the operator + * \return Returns a pointer to the display name generated + */ + const char* GenerateDisplayName(const char* op_type_ptr) { + if (!op_type_ptr) { + return NULL; + } + Tid tid = std::this_thread::get_id(); + std::lock_guard lock(mutex_); + if (tid_to_op_type_.find(tid) == tid_to_op_type_.end()) { + return NULL; } + std::string op_type = std::string(op_type_ptr); + std::string name = tid_to_op_type_[tid] + "::" + op_type; + display_names_.insert(name); + return display_names_.find(name)->c_str(); + } -protected: - CustomOpProfiler(){}; + protected: + CustomOpProfiler(){}; -private: - /*! \brief */ - std::mutex mutex_; - /* !\brief task names for sub-operators in custom ops */ - std::unordered_set display_names_; - /* */ - std::unordered_map tasks_; - /* */ - std::unordered_map tid_to_op_type_; + private: + /*! \brief class mutex*/ + std::mutex mutex_; + /* !\brief display names for sub-operators in custom ops */ + std::unordered_set display_names_; + /* !\brief profiling tasks for pure python code in custom operators*/ + std::unordered_map tasks_; + /* !\brief the maping from thread id to the registered name op the custom operator + * that is runnin on that thread + */ + std::unordered_map tid_to_op_type_; }; } // namespace profiler } // namespace mxnet diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h index 9e0d46638fca..080b31f272ac 100644 --- a/src/profiler/profiler.h +++ b/src/profiler/profiler.h @@ -1115,11 +1115,8 @@ struct ProfileMarker { VTUNE_ONLY_CODE(std::unique_ptr vtune_instant_marker_); }; - -class CustomOpProfiler; static ProfileDomain custom_op_domain("Custom Operator"); - /*! * \brief Operator profiler object. Logs as both an independent event and a task in * the operator domain @@ -1178,13 +1175,13 @@ struct ProfileOperator : public ProfileEvent { SetCategories(domain_.name()); ======= , attributes_(attributes) - , profiling_operator(true) - , profiling_task(true) { - if (strcmp(name, "CustomOperator") == 0) { - profiling_operator = false; - profiling_task = false; + , profiling_operator_(true) + , profiling_task_(true) { + if (strcmp(name, "Dummy_Wait") == 0) { + profiling_operator_ = false; + profiling_task_ = false; } else if (strcmp(name, "Custom") == 0) { - profiling_task = false; + profiling_task_ = false; } else if (std::string(name).find("::") != std::string::npos) { as_task_.setDomain(&custom_op_domain); SetCategories(custom_op_domain.name()); @@ -1201,18 +1198,18 @@ struct ProfileOperator : public ProfileEvent { void start(mxnet::Context::DeviceType dev_type, uint32_t dev_id) { dev_type_ = dev_type; dev_id_ = dev_id; - if (profiling_operator) + if (profiling_operator_) ProfileEvent::start(); - if (profiling_task) + if (profiling_task_) as_task_.start(); } /*! * \brief Stop the profiling scope */ void stop() override { - if (profiling_task) + if (profiling_task_) as_task_.stop(); - if (profiling_operator) + if (profiling_operator_) ProfileEvent::stop(); } @@ -1238,7 +1235,12 @@ struct ProfileOperator : public ProfileEvent { if (attributes) { name_.append(attributes->to_string().c_str()); } - categories_.set("operator"); + if (std::string(name).find("::") != std::string::npos) { + categories_.set(custom_op_domain.name()); + } + else { + categories_.set("operator"); + } items_[kStart].timestamp_ = start_time; items_[kStop].timestamp_ = stop_time; } @@ -1270,10 +1272,10 @@ struct ProfileOperator : public ProfileEvent { static ProfileDomain domain_; /*! \brief Optional operator attributes */ std::unique_ptr attributes_; - - bool profiling_operator; - - bool profiling_task; + /*! \brief Whether to profile as operator */ + bool profiling_operator_; + /*! \brief Whether to profile as task */ + bool profiling_task_; }; /* From 5cf59fd48f8fd2f1c902e0f9f05bd253f86337ad Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Thu, 13 Jun 2019 23:16:33 -0700 Subject: [PATCH 05/38] revert to using thread-safe Get() for singleton class CustomOpProfiler --- src/profiler/custom_op_profiler.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/profiler/custom_op_profiler.h b/src/profiler/custom_op_profiler.h index ff81b6f6bd57..0beee51169f6 100644 --- a/src/profiler/custom_op_profiler.h +++ b/src/profiler/custom_op_profiler.h @@ -37,10 +37,15 @@ using TaskPtr = std::unique_ptr; */ class CustomOpProfiler { public: - static CustomOpProfiler* Get() { - static CustomOpProfiler inst; - return &inst; + static std::mutex mtx; + static std::unique_ptr prof = nullptr; + if (!prof) { + std::unique_lock lk(mtx); + if (!prof) + prof = std::make_unique(); + } + return prof.get(); } /*! * \brief Called before the callback of custom operators to start a profile task for python @@ -92,7 +97,7 @@ class CustomOpProfiler { } protected: - CustomOpProfiler(){}; + // CustomOpProfiler(){} private: /*! \brief class mutex*/ From b9126a7810c6a60bb0742c0ed93fce23c9eb2d35 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Fri, 14 Jun 2019 10:36:56 -0700 Subject: [PATCH 06/38] indentation --- src/engine/threaded_engine.cc | 8 ++++---- src/profiler/custom_op_profiler.h | 3 --- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/engine/threaded_engine.cc b/src/engine/threaded_engine.cc index ef6e52c7135e..abf7a9a5d97f 100644 --- a/src/engine/threaded_engine.cc +++ b/src/engine/threaded_engine.cc @@ -336,16 +336,16 @@ void ThreadedEngine::PushAsync(AsyncFn fn, Context exec_ctx, const bool profiling = profiler_->IsProfiling(profiler::Profiler::kImperative); // auto start = std::chrono::duration_cast( // std::chrono::high_resolution_clock::now().time_since_epoch()).count(); - const char* d_name = profiling && opr_name ? profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : NULL; + const char* d_name = profiling && opr_name ? profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : NULL; // auto end = std::chrono::duration_cast( // std::chrono::high_resolution_clock::now().time_since_epoch()).count(); // LOG(WARNING) << opr_name; // LOG(WARNING) << start; // LOG(WARNING) << end; // LOG(WARNING) << static_cast(end-start) / 1000; - const char* display_name = d_name ? d_name : opr_name; - ThreadedOpr *opr = NewOperator(std::move(fn), const_vars, mutable_vars, prop, display_name, wait); - opr->temporary = true; + const char* display_name = d_name ? d_name : opr_name; + ThreadedOpr *opr = NewOperator(std::move(fn), const_vars, mutable_vars, prop, display_name, wait); + opr->temporary = true; Push(opr, exec_ctx, priority, profiling); } diff --git a/src/profiler/custom_op_profiler.h b/src/profiler/custom_op_profiler.h index 0beee51169f6..fc386a6b4fbd 100644 --- a/src/profiler/custom_op_profiler.h +++ b/src/profiler/custom_op_profiler.h @@ -96,9 +96,6 @@ class CustomOpProfiler { return display_names_.find(name)->c_str(); } - protected: - // CustomOpProfiler(){} - private: /*! \brief class mutex*/ std::mutex mutex_; From ed9384975236fecbec596efd14ea2bb3770b4865 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Mon, 17 Jun 2019 11:34:39 -0700 Subject: [PATCH 07/38] Now supports Naive Engine --- src/engine/naive_engine.cc | 6 +++++- src/engine/threaded_engine.cc | 17 ++++++----------- src/operator/custom/custom-inl.h | 2 ++ src/profiler/custom_op_profiler.h | 6 +++--- src/profiler/profiler.h | 3 +-- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/engine/naive_engine.cc b/src/engine/naive_engine.cc index c321e48d562d..aab4b3c3db2c 100644 --- a/src/engine/naive_engine.cc +++ b/src/engine/naive_engine.cc @@ -29,6 +29,7 @@ #include "../profiler/profiler.h" #include "./openmp.h" #include "../common/object_pool.h" +#include "../profiler/custom_op_profiler.h" namespace mxnet { namespace engine { @@ -160,6 +161,9 @@ class NaiveEngine final : public Engine { profiler::Profiler *profiler = profiler::Profiler::Get(); NaiveOpr *opr = nullptr; const bool profiling = opr_name && profiler->IsProfiling(profiler::Profiler::kImperative); + const char* d_name = profiling && opr_name ? + profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : NULL; + const char* display_name = (d_name ? d_name : opr_name); if (profiling) { opr = NewOperator(exec_fun, const_vars, mutable_vars, prop, opr_name)->Cast(); @@ -168,7 +172,7 @@ class NaiveEngine final : public Engine { if (profiler->AggregateEnabled()) { attrs.reset(new profiler::ProfileOperator::Attributes()); } - opr->opr_profile.reset(new profiler::ProfileOperator(opr->opr_name, attrs.release())); + opr->opr_profile.reset(new profiler::ProfileOperator(display_name, attrs.release())); opr->opr_profile->start(exec_ctx.dev_type, exec_ctx.dev_id); } if (exec_ctx.dev_mask() == gpu::kDevMask) { diff --git a/src/engine/threaded_engine.cc b/src/engine/threaded_engine.cc index abf7a9a5d97f..ef09171dbe18 100644 --- a/src/engine/threaded_engine.cc +++ b/src/engine/threaded_engine.cc @@ -334,17 +334,12 @@ void ThreadedEngine::PushAsync(AsyncFn fn, Context exec_ctx, } #endif const bool profiling = profiler_->IsProfiling(profiler::Profiler::kImperative); - // auto start = std::chrono::duration_cast( - // std::chrono::high_resolution_clock::now().time_since_epoch()).count(); - const char* d_name = profiling && opr_name ? profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : NULL; - // auto end = std::chrono::duration_cast( - // std::chrono::high_resolution_clock::now().time_since_epoch()).count(); - // LOG(WARNING) << opr_name; - // LOG(WARNING) << start; - // LOG(WARNING) << end; - // LOG(WARNING) << static_cast(end-start) / 1000; - const char* display_name = d_name ? d_name : opr_name; - ThreadedOpr *opr = NewOperator(std::move(fn), const_vars, mutable_vars, prop, display_name, wait); + // Try to get a new name. Only not null for sub-operators of a custom operator + const char* d_name = profiling && opr_name ? + profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : NULL; + const char* display_name = (d_name ? d_name : opr_name); + ThreadedOpr *opr = NewOperator(std::move(fn), const_vars, mutable_vars, + prop, display_name, wait); opr->temporary = true; Push(opr, exec_ctx, priority, profiling); } diff --git a/src/operator/custom/custom-inl.h b/src/operator/custom/custom-inl.h index 4616946dac44..e84e58ec3c18 100644 --- a/src/operator/custom/custom-inl.h +++ b/src/operator/custom/custom-inl.h @@ -80,7 +80,9 @@ class CustomOperator { const std::vector& outputs, const std::string op_type = "") { if (naive_engine_) { + profiler::CustomOpProfiler::Get()->OnCustomBegin(op_type); func(); + profiler::CustomOpProfiler::Get()->OnCustomEnd(); for (size_t i = 0, out_idx = 0; i < arrs.size(); i++) { if (arrs[i].storage_type() == kDefaultStorage || arrs[i].storage_type() == kUndefinedStorage) diff --git a/src/profiler/custom_op_profiler.h b/src/profiler/custom_op_profiler.h index fc386a6b4fbd..ace674b5c0ba 100644 --- a/src/profiler/custom_op_profiler.h +++ b/src/profiler/custom_op_profiler.h @@ -16,8 +16,8 @@ * specific language governing permissions and limitations * under the License. */ -#ifndef MXNET_CUSTOM_OP_PROFILER_H_ -#define MXNET_CUSTOM_OP_PROFILER_H_ +#ifndef MXNET_PROFILER_CUSTOM_OP_PROFILER_H_ +#define MXNET_PROFILER_CUSTOM_OP_PROFILER_H_ #include #include @@ -111,4 +111,4 @@ class CustomOpProfiler { } // namespace profiler } // namespace mxnet -#endif // MXNET_CUSTOM_OP_PROFILER_H_ \ No newline at end of file +#endif // MXNET_CUSTOM_OP_PROFILER_H_ diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h index 080b31f272ac..71b551a2e139 100644 --- a/src/profiler/profiler.h +++ b/src/profiler/profiler.h @@ -1237,8 +1237,7 @@ struct ProfileOperator : public ProfileEvent { } if (std::string(name).find("::") != std::string::npos) { categories_.set(custom_op_domain.name()); - } - else { + } else { categories_.set("operator"); } items_[kStart].timestamp_ = start_time; From cb21b2b43cbffd4df1b227af8db417c2895acfd9 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Mon, 17 Jun 2019 11:56:42 -0700 Subject: [PATCH 08/38] style fix --- src/profiler/custom_op_profiler.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/profiler/custom_op_profiler.h b/src/profiler/custom_op_profiler.h index ace674b5c0ba..8685475ea072 100644 --- a/src/profiler/custom_op_profiler.h +++ b/src/profiler/custom_op_profiler.h @@ -54,7 +54,7 @@ class CustomOpProfiler { */ void OnCustomBegin(const std::string& op_type) { const Tid tid = std::this_thread::get_id(); - const std::string task_name = op_type + "::pure_python" ; + const std::string task_name = op_type + "::pure_python"; std::lock_guard lock(mutex_); tid_to_op_type_[tid] = op_type; tasks_[tid] = std::make_unique(task_name.c_str(), &custom_op_domain); @@ -85,7 +85,7 @@ class CustomOpProfiler { if (!op_type_ptr) { return NULL; } - Tid tid = std::this_thread::get_id(); + Tid tid = std::this_thread::get_id(); std::lock_guard lock(mutex_); if (tid_to_op_type_.find(tid) == tid_to_op_type_.end()) { return NULL; @@ -107,8 +107,8 @@ class CustomOpProfiler { * that is runnin on that thread */ std::unordered_map tid_to_op_type_; -}; +}; } // namespace profiler } // namespace mxnet -#endif // MXNET_CUSTOM_OP_PROFILER_H_ +#endif // MXNET_PROFILER_CUSTOM_OP_PROFILER_H_ From e21f18e5bbfbfce9d5bc3815df73371ce3ad6452 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Mon, 17 Jun 2019 13:13:01 -0700 Subject: [PATCH 09/38] tidiness --- src/engine/naive_engine.cc | 3 ++- src/engine/threaded_engine.cc | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/engine/naive_engine.cc b/src/engine/naive_engine.cc index aab4b3c3db2c..43557556ea24 100644 --- a/src/engine/naive_engine.cc +++ b/src/engine/naive_engine.cc @@ -160,9 +160,10 @@ class NaiveEngine final : public Engine { this->req_completed_ = false; profiler::Profiler *profiler = profiler::Profiler::Get(); NaiveOpr *opr = nullptr; + // Try to get a new name. Only not null for sub-operators of a custom operator const bool profiling = opr_name && profiler->IsProfiling(profiler::Profiler::kImperative); const char* d_name = profiling && opr_name ? - profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : NULL; + profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : nullptr; const char* display_name = (d_name ? d_name : opr_name); if (profiling) { opr = NewOperator(exec_fun, const_vars, mutable_vars, diff --git a/src/engine/threaded_engine.cc b/src/engine/threaded_engine.cc index ef09171dbe18..0e12dd9b186e 100644 --- a/src/engine/threaded_engine.cc +++ b/src/engine/threaded_engine.cc @@ -336,7 +336,7 @@ void ThreadedEngine::PushAsync(AsyncFn fn, Context exec_ctx, const bool profiling = profiler_->IsProfiling(profiler::Profiler::kImperative); // Try to get a new name. Only not null for sub-operators of a custom operator const char* d_name = profiling && opr_name ? - profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : NULL; + profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : nullptr; const char* display_name = (d_name ? d_name : opr_name); ThreadedOpr *opr = NewOperator(std::move(fn), const_vars, mutable_vars, prop, display_name, wait); From c9081198235bc5b2344c4cb5e64bafbd20c200ba Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Mon, 17 Jun 2019 14:48:15 -0700 Subject: [PATCH 10/38] tests added --- src/operator/custom/custom.cc | 4 +- tests/python/unittest/test_profiler.py | 56 ++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/src/operator/custom/custom.cc b/src/operator/custom/custom.cc index 11607e78ae1b..20cae6a90d9a 100644 --- a/src/operator/custom/custom.cc +++ b/src/operator/custom/custom.cc @@ -345,7 +345,7 @@ void ForwardEx(const OpStatePtr& state, const OpContext& ctx, static_cast(ctx.is_train), params.info->contexts[kCustomOpForward])); }, - ctx, false, ctx.is_train, cpys, tags, output_tags, outputs, params.op_type); + ctx, false, ctx.is_train, cpys, tags, output_tags, outputs, std::string(params.op_type)); } void BackwardEx(const OpStatePtr& state, const OpContext& ctx, @@ -415,7 +415,7 @@ void BackwardEx(const OpStatePtr& state, const OpContext& ctx, ptrs.size(), const_cast(ptrs.data()), const_cast(tags.data()), reinterpret_cast(req.data()), static_cast(ctx.is_train), params.info->contexts[kCustomOpBackward])); - }, ctx, false, ctx.is_train, cpys, tags, output_tags, outputs, params.op_type); + }, ctx, false, ctx.is_train, cpys, tags, output_tags, outputs, "_backward_" + std::string(params.op_type)); } // infer storage backward function for custom op which assigns kDefaultStorage for diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py index b76bfbc82d12..fcb945bc6fda 100644 --- a/tests/python/unittest/test_profiler.py +++ b/tests/python/unittest/test_profiler.py @@ -269,6 +269,7 @@ def check_sorting(debug_str, sort_by, ascending): check_sorting(debug_str, sb, asc) profiler.set_state('stop') +<<<<<<< HEAD def test_aggregate_duplication(): file_name = 'test_aggregate_duplication.json' enable_profiler(profile_filename = file_name, run=True, continuous_dump=True, \ @@ -291,6 +292,61 @@ def test_aggregate_duplication(): assert target_dict['Time']['operator']['_plus_scalar']['Count'] == 2 profiler.set_state('stop') +======= +def test_custom_operator_profiling(): + class Sigmoid(mx.operator.CustomOp): + def forward(self, is_train, req, in_data, out_data, aux): + x = in_data[0].asnumpy() + import numpy as np + y = 1.0 / (1.0 + np.exp(-x)) + self.assign(out_data[0], req[0], mx.nd.array(y)) + # Create a dummy matrix using nd.zeros. Test if 'MySigmoid::_zeros' is in dump file + dummy = mx.nd.zeros(shape=(100, 100)) + + def backward(self, req, out_grad, in_data, out_data, in_grad, aux): + y = out_data[0].asnumpy() + dy = out_grad[0].asnumpy() + dx = dy*(1.0 - y)*y + self.assign(in_grad[0], req[0], mx.nd.array(dx)) + + @mx.operator.register("MySigmoid") + class SigmoidProp(mx.operator.CustomOpProp): + def __init__(self): + super(SigmoidProp, self).__init__(True) + + def list_arguments(self): + return ['data'] + + def list_outputs(self): + return ['output'] + + def infer_shape(self, in_shapes): + data_shape = in_shapes[0] + output_shape = data_shape + return (data_shape,), (output_shape,), () + + def create_operator(self, ctx, in_shapes, in_dtypes): + return Sigmoid() + + file_name = 'test_custom_operator_domain.json' + enable_profiler(file_name, True, True, True) + x = mx.nd.array([0, 1, 2, 3]) + x.attach_grad() + with mx.autograd.record(): + y = mx.nd.Custom(x, op_type='MySigmoid') + y.backward() + mx.nd.waitall() + profiler.dump() + debug_str = profiler.dumps(format = 'json') + target_dict = json.loads(debug_str) + print(target_dict) + assert "Time" in target_dict and "Custom Operator" in target_dict["Time"] \ + and "MySigmoid::pure_python" in target_dict["Time"]["Custom Operator"] \ + and "_backward_MySigmoid::pure_python" in target_dict["Time"]["Custom Operator"] \ + and "MySigmoid::_zeros" in target_dict["Time"]["Custom Operator"] + profiler.set_state('stop') + +>>>>>>> tests added if __name__ == '__main__': import nose nose.runmodule() From 51b862a15c82e8feac72346b617e2191702ca69a Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Mon, 17 Jun 2019 15:06:18 -0700 Subject: [PATCH 11/38] style fix --- src/operator/custom/custom.cc | 3 ++- tests/python/unittest/test_profiler.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/custom/custom.cc b/src/operator/custom/custom.cc index 20cae6a90d9a..1290c9f68e64 100644 --- a/src/operator/custom/custom.cc +++ b/src/operator/custom/custom.cc @@ -415,7 +415,8 @@ void BackwardEx(const OpStatePtr& state, const OpContext& ctx, ptrs.size(), const_cast(ptrs.data()), const_cast(tags.data()), reinterpret_cast(req.data()), static_cast(ctx.is_train), params.info->contexts[kCustomOpBackward])); - }, ctx, false, ctx.is_train, cpys, tags, output_tags, outputs, "_backward_" + std::string(params.op_type)); + }, ctx, false, ctx.is_train, cpys, tags, output_tags, outputs, + "_backward_" + std::string(params.op_type)); } // infer storage backward function for custom op which assigns kDefaultStorage for diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py index fcb945bc6fda..32fa06aee601 100644 --- a/tests/python/unittest/test_profiler.py +++ b/tests/python/unittest/test_profiler.py @@ -339,7 +339,6 @@ def create_operator(self, ctx, in_shapes, in_dtypes): profiler.dump() debug_str = profiler.dumps(format = 'json') target_dict = json.loads(debug_str) - print(target_dict) assert "Time" in target_dict and "Custom Operator" in target_dict["Time"] \ and "MySigmoid::pure_python" in target_dict["Time"]["Custom Operator"] \ and "_backward_MySigmoid::pure_python" in target_dict["Time"]["Custom Operator"] \ From 36fcf62b9a79e80a995b15785a48d0ae203af227 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Mon, 17 Jun 2019 16:38:24 -0700 Subject: [PATCH 12/38] add a new test case which has multiple custom ops --- tests/python/unittest/test_profiler.py | 74 ++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 4 deletions(-) diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py index 32fa06aee601..d6ea64f0a138 100644 --- a/tests/python/unittest/test_profiler.py +++ b/tests/python/unittest/test_profiler.py @@ -269,7 +269,6 @@ def check_sorting(debug_str, sort_by, ascending): check_sorting(debug_str, sb, asc) profiler.set_state('stop') -<<<<<<< HEAD def test_aggregate_duplication(): file_name = 'test_aggregate_duplication.json' enable_profiler(profile_filename = file_name, run=True, continuous_dump=True, \ @@ -292,7 +291,6 @@ def test_aggregate_duplication(): assert target_dict['Time']['operator']['_plus_scalar']['Count'] == 2 profiler.set_state('stop') -======= def test_custom_operator_profiling(): class Sigmoid(mx.operator.CustomOp): def forward(self, is_train, req, in_data, out_data, aux): @@ -328,7 +326,7 @@ def infer_shape(self, in_shapes): def create_operator(self, ctx, in_shapes, in_dtypes): return Sigmoid() - file_name = 'test_custom_operator_domain.json' + file_name = 'test_custom_operator_profiling.json' enable_profiler(file_name, True, True, True) x = mx.nd.array([0, 1, 2, 3]) x.attach_grad() @@ -345,7 +343,75 @@ def create_operator(self, ctx, in_shapes, in_dtypes): and "MySigmoid::_zeros" in target_dict["Time"]["Custom Operator"] profiler.set_state('stop') ->>>>>>> tests added +def test_custom_operator_profiling_multiple_custom_ops(): + class MyAdd(mx.operator.CustomOp): + def forward(self, is_train, req, in_data, out_data, aux): + self.assign(out_data[0], req[0], in_data[0] + 1) + + def backward(self, req, out_grad, in_data, out_data, in_grad, aux): + self.assign(in_grad[0], req[0], out_grad[0]) + + @mx.operator.register("MyAdd1") + class MyAdd1Prop(mx.operator.CustomOpProp): + def __init__(self): + super(MyAdd1Prop, self).__init__(need_top_grad=True) + + def list_arguments(self): + return ['in'] + + def list_outputs(self): + return ['output'] + + def infer_shape(self, in_shape): + # inputs, outputs, aux + return [in_shape[0]], [in_shape[0]], [] + + def create_operator(self, ctx, shapes, dtypes): + return MyAdd() + + @mx.operator.register("MyAdd2") + class MyAdd2Prop(mx.operator.CustomOpProp): + def __init__(self): + super(MyAdd2Prop, self).__init__(need_top_grad=True) + + def list_arguments(self): + return ['in'] + + def list_outputs(self): + return ['output'] + + def infer_shape(self, in_shape): + # inputs, outputs, aux + return [in_shape[0]], [in_shape[0]], [] + + def create_operator(self, ctx, shapes, dtypes): + return MyAdd() + + file_name = 'test_custom_operator_profiling_multiple_custom_ops.json' + enable_profiler(file_name, True, True, True) + inp = mx.nd.zeros(shape=(100, 100)) + x = inp + 1 + y = mx.nd.Custom(inp, op_type="MyAdd1") + z = mx.nd.Custom(inp, op_type="MyAdd2") + mx.nd.waitall() + profiler.dump() + debug_str = profiler.dumps(format = 'json') + target_dict = json.loads(debug_str) + ''' + We are calling _plus_scalar within MyAdd1 and MyAdd2 and outside both the custom + operators, so in aggregate stats we should have three different kinds of + _plus_scalar under domains "Custom Operator" and "operator" + ''' + assert "Time" in target_dict and "Custom Operator" in target_dict["Time"] \ + and "MyAdd1::pure_python" in target_dict["Time"]["Custom Operator"] \ + and "MyAdd2::pure_python" in target_dict["Time"]["Custom Operator"] \ + and "MyAdd1::_plus_scalar" in target_dict["Time"]["Custom Operator"] \ + and "MyAdd2::_plus_scalar" in target_dict["Time"]["Custom Operator"] \ + and "_plus_scalar" not in target_dict["Time"]["Custom Operator"] \ + and "operator" in target_dict["Time"] \ + and "_plus_scalar" in target_dict["Time"]["operator"] + profiler.set_state('stop') + if __name__ == '__main__': import nose nose.runmodule() From e553a3855486c996af52039cbc91fd707ca72d62 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Tue, 18 Jun 2019 11:44:37 -0700 Subject: [PATCH 13/38] testcases fix --- tests/python/unittest/test_profiler.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py index d6ea64f0a138..1e3aa76e9757 100644 --- a/tests/python/unittest/test_profiler.py +++ b/tests/python/unittest/test_profiler.py @@ -327,7 +327,8 @@ def create_operator(self, ctx, in_shapes, in_dtypes): return Sigmoid() file_name = 'test_custom_operator_profiling.json' - enable_profiler(file_name, True, True, True) + enable_profiler(profile_filename = file_name, run=True, continuous_dump=True,\ + aggregate_stats=True) x = mx.nd.array([0, 1, 2, 3]) x.attach_grad() with mx.autograd.record(): @@ -388,7 +389,8 @@ def create_operator(self, ctx, shapes, dtypes): return MyAdd() file_name = 'test_custom_operator_profiling_multiple_custom_ops.json' - enable_profiler(file_name, True, True, True) + enable_profiler(profile_filename = file_name, run=True, continuous_dump=True,\ + aggregate_stats=True) inp = mx.nd.zeros(shape=(100, 100)) x = inp + 1 y = mx.nd.Custom(inp, op_type="MyAdd1") From 9f1275df7b2aa7e7f1fe41420a01228fc94f26b7 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Tue, 18 Jun 2019 11:57:28 -0700 Subject: [PATCH 14/38] fix --- tests/python/unittest/test_profiler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py index 1e3aa76e9757..8476f27e96ca 100644 --- a/tests/python/unittest/test_profiler.py +++ b/tests/python/unittest/test_profiler.py @@ -335,7 +335,7 @@ def create_operator(self, ctx, in_shapes, in_dtypes): y = mx.nd.Custom(x, op_type='MySigmoid') y.backward() mx.nd.waitall() - profiler.dump() + profiler.dump(False) debug_str = profiler.dumps(format = 'json') target_dict = json.loads(debug_str) assert "Time" in target_dict and "Custom Operator" in target_dict["Time"] \ @@ -396,7 +396,7 @@ def create_operator(self, ctx, shapes, dtypes): y = mx.nd.Custom(inp, op_type="MyAdd1") z = mx.nd.Custom(inp, op_type="MyAdd2") mx.nd.waitall() - profiler.dump() + profiler.dump(False) debug_str = profiler.dumps(format = 'json') target_dict = json.loads(debug_str) ''' From 0fd48f5029b753395ced11c40de88488019a9cce Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Tue, 18 Jun 2019 15:09:46 -0700 Subject: [PATCH 15/38] fix style --- src/operator/custom/custom.cc | 5 ++-- tests/python/unittest/test_profiler.py | 34 +++++++++++++------------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/src/operator/custom/custom.cc b/src/operator/custom/custom.cc index 1290c9f68e64..5d2c284be46f 100644 --- a/src/operator/custom/custom.cc +++ b/src/operator/custom/custom.cc @@ -345,7 +345,7 @@ void ForwardEx(const OpStatePtr& state, const OpContext& ctx, static_cast(ctx.is_train), params.info->contexts[kCustomOpForward])); }, - ctx, false, ctx.is_train, cpys, tags, output_tags, outputs, std::string(params.op_type)); + ctx, false, ctx.is_train, cpys, tags, output_tags, outputs, params.op_type); } void BackwardEx(const OpStatePtr& state, const OpContext& ctx, @@ -415,8 +415,7 @@ void BackwardEx(const OpStatePtr& state, const OpContext& ctx, ptrs.size(), const_cast(ptrs.data()), const_cast(tags.data()), reinterpret_cast(req.data()), static_cast(ctx.is_train), params.info->contexts[kCustomOpBackward])); - }, ctx, false, ctx.is_train, cpys, tags, output_tags, outputs, - "_backward_" + std::string(params.op_type)); + }, ctx, false, ctx.is_train, cpys, tags, output_tags, outputs, "_backward_" + params.op_type); } // infer storage backward function for custom op which assigns kDefaultStorage for diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py index 8476f27e96ca..91d5b739dffd 100644 --- a/tests/python/unittest/test_profiler.py +++ b/tests/python/unittest/test_profiler.py @@ -307,7 +307,7 @@ def backward(self, req, out_grad, in_data, out_data, in_grad, aux): dx = dy*(1.0 - y)*y self.assign(in_grad[0], req[0], mx.nd.array(dx)) - @mx.operator.register("MySigmoid") + @mx.operator.register('MySigmoid') class SigmoidProp(mx.operator.CustomOpProp): def __init__(self): super(SigmoidProp, self).__init__(True) @@ -338,10 +338,10 @@ def create_operator(self, ctx, in_shapes, in_dtypes): profiler.dump(False) debug_str = profiler.dumps(format = 'json') target_dict = json.loads(debug_str) - assert "Time" in target_dict and "Custom Operator" in target_dict["Time"] \ - and "MySigmoid::pure_python" in target_dict["Time"]["Custom Operator"] \ - and "_backward_MySigmoid::pure_python" in target_dict["Time"]["Custom Operator"] \ - and "MySigmoid::_zeros" in target_dict["Time"]["Custom Operator"] + assert 'Time' in target_dict and 'Custom Operator' in target_dict['Time'] \ + and 'MySigmoid::pure_python' in target_dict['Time']['Custom Operator'] \ + and '_backward_MySigmoid::pure_python' in target_dict['Time']['Custom Operator'] \ + and 'MySigmoid::_zeros' in target_dict['Time']['Custom Operator'] profiler.set_state('stop') def test_custom_operator_profiling_multiple_custom_ops(): @@ -352,7 +352,7 @@ def forward(self, is_train, req, in_data, out_data, aux): def backward(self, req, out_grad, in_data, out_data, in_grad, aux): self.assign(in_grad[0], req[0], out_grad[0]) - @mx.operator.register("MyAdd1") + @mx.operator.register('MyAdd1') class MyAdd1Prop(mx.operator.CustomOpProp): def __init__(self): super(MyAdd1Prop, self).__init__(need_top_grad=True) @@ -370,7 +370,7 @@ def infer_shape(self, in_shape): def create_operator(self, ctx, shapes, dtypes): return MyAdd() - @mx.operator.register("MyAdd2") + @mx.operator.register('MyAdd2') class MyAdd2Prop(mx.operator.CustomOpProp): def __init__(self): super(MyAdd2Prop, self).__init__(need_top_grad=True) @@ -393,8 +393,8 @@ def create_operator(self, ctx, shapes, dtypes): aggregate_stats=True) inp = mx.nd.zeros(shape=(100, 100)) x = inp + 1 - y = mx.nd.Custom(inp, op_type="MyAdd1") - z = mx.nd.Custom(inp, op_type="MyAdd2") + y = mx.nd.Custom(inp, op_type='MyAdd1') + z = mx.nd.Custom(inp, op_type='MyAdd2') mx.nd.waitall() profiler.dump(False) debug_str = profiler.dumps(format = 'json') @@ -404,14 +404,14 @@ def create_operator(self, ctx, shapes, dtypes): operators, so in aggregate stats we should have three different kinds of _plus_scalar under domains "Custom Operator" and "operator" ''' - assert "Time" in target_dict and "Custom Operator" in target_dict["Time"] \ - and "MyAdd1::pure_python" in target_dict["Time"]["Custom Operator"] \ - and "MyAdd2::pure_python" in target_dict["Time"]["Custom Operator"] \ - and "MyAdd1::_plus_scalar" in target_dict["Time"]["Custom Operator"] \ - and "MyAdd2::_plus_scalar" in target_dict["Time"]["Custom Operator"] \ - and "_plus_scalar" not in target_dict["Time"]["Custom Operator"] \ - and "operator" in target_dict["Time"] \ - and "_plus_scalar" in target_dict["Time"]["operator"] + assert 'Time' in target_dict and 'Custom Operator' in target_dict['Time'] \ + and 'MyAdd1::pure_python' in target_dict['Time']['Custom Operator'] \ + and 'MyAdd2::pure_python' in target_dict['Time']['Custom Operator'] \ + and 'MyAdd1::_plus_scalar' in target_dict['Time']['Custom Operator'] \ + and 'MyAdd2::_plus_scalar' in target_dict['Time']['Custom Operator'] \ + and '_plus_scalar' not in target_dict['Time']['Custom Operator'] \ + and 'operator' in target_dict['Time'] \ + and '_plus_scalar' in target_dict['Time']['operator'] profiler.set_state('stop') if __name__ == '__main__': From 9820bac2b90e1d8fa1282d59b840b4f897b5b4e0 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Tue, 18 Jun 2019 17:06:29 -0700 Subject: [PATCH 16/38] minor naive engine fix --- src/engine/naive_engine.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/naive_engine.cc b/src/engine/naive_engine.cc index 43557556ea24..af04917e7334 100644 --- a/src/engine/naive_engine.cc +++ b/src/engine/naive_engine.cc @@ -167,7 +167,7 @@ class NaiveEngine final : public Engine { const char* display_name = (d_name ? d_name : opr_name); if (profiling) { opr = NewOperator(exec_fun, const_vars, mutable_vars, - prop, opr_name)->Cast(); + prop, display_name)->Cast(); opr->profiling = profiling; std::unique_ptr attrs; if (profiler->AggregateEnabled()) { From 89de9f40dda45d90179d5841d8dfd23596bdef1e Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Wed, 19 Jun 2019 10:56:26 -0700 Subject: [PATCH 17/38] simplify some branching logic --- src/engine/naive_engine.cc | 7 ++++--- src/engine/threaded_engine.cc | 8 ++++---- src/profiler/custom_op_profiler.h | 7 ++++--- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/engine/naive_engine.cc b/src/engine/naive_engine.cc index af04917e7334..30a37c73ffe3 100644 --- a/src/engine/naive_engine.cc +++ b/src/engine/naive_engine.cc @@ -162,9 +162,10 @@ class NaiveEngine final : public Engine { NaiveOpr *opr = nullptr; // Try to get a new name. Only not null for sub-operators of a custom operator const bool profiling = opr_name && profiler->IsProfiling(profiler::Profiler::kImperative); - const char* d_name = profiling && opr_name ? - profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : nullptr; - const char* display_name = (d_name ? d_name : opr_name); + // GenerateDisplayName() will return a pointer to the correct name of the operator + const char* display_name = profiling ? + profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) + : opr_name; if (profiling) { opr = NewOperator(exec_fun, const_vars, mutable_vars, prop, display_name)->Cast(); diff --git a/src/engine/threaded_engine.cc b/src/engine/threaded_engine.cc index 0e12dd9b186e..48ab7d5cff80 100644 --- a/src/engine/threaded_engine.cc +++ b/src/engine/threaded_engine.cc @@ -334,10 +334,10 @@ void ThreadedEngine::PushAsync(AsyncFn fn, Context exec_ctx, } #endif const bool profiling = profiler_->IsProfiling(profiler::Profiler::kImperative); - // Try to get a new name. Only not null for sub-operators of a custom operator - const char* d_name = profiling && opr_name ? - profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : nullptr; - const char* display_name = (d_name ? d_name : opr_name); + // GenerateDisplayName() will return a pointer to the correct name of the operator + const char* display_name = profiling ? + profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) + : opr_name; ThreadedOpr *opr = NewOperator(std::move(fn), const_vars, mutable_vars, prop, display_name, wait); opr->temporary = true; diff --git a/src/profiler/custom_op_profiler.h b/src/profiler/custom_op_profiler.h index 8685475ea072..6b0b9dd8cfe2 100644 --- a/src/profiler/custom_op_profiler.h +++ b/src/profiler/custom_op_profiler.h @@ -77,18 +77,19 @@ class CustomOpProfiler { /*! * \brief Generate a display name for sub-operators, which is the name used for OprBlock * and later by profiler, and store it in a unordered_set so that it can be referenced - * in the future + * in the future. + * Notice if the operator is not a sub-operator, just return the char pointer back. * \param op_type_ptr The registed name of the operator * \return Returns a pointer to the display name generated */ const char* GenerateDisplayName(const char* op_type_ptr) { if (!op_type_ptr) { - return NULL; + return nullptr; } Tid tid = std::this_thread::get_id(); std::lock_guard lock(mutex_); if (tid_to_op_type_.find(tid) == tid_to_op_type_.end()) { - return NULL; + return op_type_ptr; } std::string op_type = std::string(op_type_ptr); std::string name = tid_to_op_type_[tid] + "::" + op_type; From 3e97083e61e1f611bcfb5bcb33736b9734d026de Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Wed, 19 Jun 2019 14:11:59 -0700 Subject: [PATCH 18/38] better desing style --- src/profiler/profiler.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h index 71b551a2e139..4dcd7787cdc3 100644 --- a/src/profiler/profiler.h +++ b/src/profiler/profiler.h @@ -790,7 +790,10 @@ struct ProfileTask : public ProfileDuration { NVTX_ONLY_CODE(nvtx_duration_.reset(new nvtx::NVTXDuration(name))); } - void setDomain(ProfileDomain *domain) { + /*! + * \brief Set the domain + */ + void setDomain(ProfileDomain* domain) { domain_ = domain; } @@ -1175,14 +1178,9 @@ struct ProfileOperator : public ProfileEvent { SetCategories(domain_.name()); ======= , attributes_(attributes) - , profiling_operator_(true) - , profiling_task_(true) { - if (strcmp(name, "Dummy_Wait") == 0) { - profiling_operator_ = false; - profiling_task_ = false; - } else if (strcmp(name, "Custom") == 0) { - profiling_task_ = false; - } else if (std::string(name).find("::") != std::string::npos) { + , profiling_operator_(strcmp(name, "Dummy_Wait")) + , profiling_task_(strcmp(name, "Dummy_Wait") && strcmp(name, "Custom")) { + if (std::string(name).find("::") != std::string::npos) { as_task_.setDomain(&custom_op_domain); SetCategories(custom_op_domain.name()); } else { @@ -1272,9 +1270,9 @@ struct ProfileOperator : public ProfileEvent { /*! \brief Optional operator attributes */ std::unique_ptr attributes_; /*! \brief Whether to profile as operator */ - bool profiling_operator_; + const bool profiling_operator_; /*! \brief Whether to profile as task */ - bool profiling_task_; + const bool profiling_task_; }; /* From 5b3ab96b001db2a7437fb5d83666948e46333435 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Wed, 19 Jun 2019 16:13:19 -0700 Subject: [PATCH 19/38] fix --- src/engine/naive_engine.cc | 4 ++-- src/engine/threaded_engine.cc | 4 ++-- src/operator/custom/custom-inl.h | 3 +-- src/profiler/custom_op_profiler.h | 7 +++---- src/profiler/profiler.h | 4 ++-- 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/engine/naive_engine.cc b/src/engine/naive_engine.cc index 30a37c73ffe3..80f0e1111349 100644 --- a/src/engine/naive_engine.cc +++ b/src/engine/naive_engine.cc @@ -164,8 +164,8 @@ class NaiveEngine final : public Engine { const bool profiling = opr_name && profiler->IsProfiling(profiler::Profiler::kImperative); // GenerateDisplayName() will return a pointer to the correct name of the operator const char* display_name = profiling ? - profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) - : opr_name; + profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : + opr_name; if (profiling) { opr = NewOperator(exec_fun, const_vars, mutable_vars, prop, display_name)->Cast(); diff --git a/src/engine/threaded_engine.cc b/src/engine/threaded_engine.cc index 48ab7d5cff80..ac1f112383e1 100644 --- a/src/engine/threaded_engine.cc +++ b/src/engine/threaded_engine.cc @@ -336,8 +336,8 @@ void ThreadedEngine::PushAsync(AsyncFn fn, Context exec_ctx, const bool profiling = profiler_->IsProfiling(profiler::Profiler::kImperative); // GenerateDisplayName() will return a pointer to the correct name of the operator const char* display_name = profiling ? - profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) - : opr_name; + profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : + opr_name; ThreadedOpr *opr = NewOperator(std::move(fn), const_vars, mutable_vars, prop, display_name, wait); opr->temporary = true; diff --git a/src/operator/custom/custom-inl.h b/src/operator/custom/custom-inl.h index e84e58ec3c18..f720d6605bd3 100644 --- a/src/operator/custom/custom-inl.h +++ b/src/operator/custom/custom-inl.h @@ -149,8 +149,7 @@ class CustomOperator { ctx.async_on_complete(); }, - ctx.run_ctx.ctx, vars, vars2, FnProperty::kNoSkip, 0, - "Dummy_Wait"); + ctx.run_ctx.ctx, vars, vars2, FnProperty::kNoSkip, 0, "Dummy_Wait"); }); // increase num_threads if there is not enough threads to execute custom operator if (q_.size() > num_free_threads_) diff --git a/src/profiler/custom_op_profiler.h b/src/profiler/custom_op_profiler.h index 6b0b9dd8cfe2..4d58dedaeaa4 100644 --- a/src/profiler/custom_op_profiler.h +++ b/src/profiler/custom_op_profiler.h @@ -68,6 +68,7 @@ class CustomOpProfiler { void OnCustomEnd() { const Tid tid = std::this_thread::get_id(); std::lock_guard lock(mutex_); + // Theoretically this check will never fail if we use onBegin and onEnd in pairs CHECK(tasks_.find(tid) != tasks_.end()); tasks_[tid]->stop(); tasks_.erase(tid); @@ -91,10 +92,8 @@ class CustomOpProfiler { if (tid_to_op_type_.find(tid) == tid_to_op_type_.end()) { return op_type_ptr; } - std::string op_type = std::string(op_type_ptr); - std::string name = tid_to_op_type_[tid] + "::" + op_type; - display_names_.insert(name); - return display_names_.find(name)->c_str(); + std::string name = tid_to_op_type_[tid] + "::" + std::string(op_type_ptr); + return display_names_.insert(name).first->c_str(); } private: diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h index 4dcd7787cdc3..a5160f36358f 100644 --- a/src/profiler/profiler.h +++ b/src/profiler/profiler.h @@ -1178,9 +1178,9 @@ struct ProfileOperator : public ProfileEvent { SetCategories(domain_.name()); ======= , attributes_(attributes) - , profiling_operator_(strcmp(name, "Dummy_Wait")) + , profiling_operator_(strcmp(name, "Dummy_Wait") && strcmp(name, "Custom")) , profiling_task_(strcmp(name, "Dummy_Wait") && strcmp(name, "Custom")) { - if (std::string(name).find("::") != std::string::npos) { + if (strstr(name, "::")) { as_task_.setDomain(&custom_op_domain); SetCategories(custom_op_domain.name()); } else { From b40b432500df2e623d1385ee36703f474a016fa2 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Thu, 20 Jun 2019 12:18:22 -0700 Subject: [PATCH 20/38] fix --- src/profiler/custom_op_profiler.h | 44 ++++++++++++++++---------- src/profiler/profiler.h | 32 ++++++++++++------- tests/python/unittest/test_profiler.py | 10 ++++-- 3 files changed, 57 insertions(+), 29 deletions(-) diff --git a/src/profiler/custom_op_profiler.h b/src/profiler/custom_op_profiler.h index 4d58dedaeaa4..4f0d006403e6 100644 --- a/src/profiler/custom_op_profiler.h +++ b/src/profiler/custom_op_profiler.h @@ -33,7 +33,7 @@ using TaskPtr = std::unique_ptr; /*! * \brief Singleton class to assist profiling python callback of custom operators - * and to assist linking sub-operators to custom operators + * and to assist linking sub-operators to custom operators */ class CustomOpProfiler { public: @@ -49,12 +49,12 @@ class CustomOpProfiler { } /*! * \brief Called before the callback of custom operators to start a profile task for python - * code execution time + * code execution time * \param op_type The registed name of the custom operator */ void OnCustomBegin(const std::string& op_type) { const Tid tid = std::this_thread::get_id(); - const std::string task_name = op_type + "::pure_python"; + const std::string task_name = MakePythonCodeName(op_type); std::lock_guard lock(mutex_); tid_to_op_type_[tid] = op_type; tasks_[tid] = std::make_unique(task_name.c_str(), &custom_op_domain); @@ -63,13 +63,14 @@ class CustomOpProfiler { /*! * \brief Called after the callback of custom operators to stop the profile task for python - * code execution time + * code execution time */ void OnCustomEnd() { const Tid tid = std::this_thread::get_id(); std::lock_guard lock(mutex_); // Theoretically this check will never fail if we use onBegin and onEnd in pairs - CHECK(tasks_.find(tid) != tasks_.end()); + CHECK(tasks_.find(tid) != tasks_.end()) << "thread_id not found. " << + "Please use OnCustomBegin() and OnCustomEnd() in pairs on the same thread."; tasks_[tid]->stop(); tasks_.erase(tid); tid_to_op_type_.erase(tid); @@ -77,34 +78,45 @@ class CustomOpProfiler { /*! * \brief Generate a display name for sub-operators, which is the name used for OprBlock - * and later by profiler, and store it in a unordered_set so that it can be referenced - * in the future. - * Notice if the operator is not a sub-operator, just return the char pointer back. - * \param op_type_ptr The registed name of the operator + * and later by profiler, and store it in a unordered_set so that it can be referenced + * in the future. + * Notice if the operator is not a sub-operator, just return the char pointer back. + * \param op_type The registed name of the operator * \return Returns a pointer to the display name generated */ - const char* GenerateDisplayName(const char* op_type_ptr) { - if (!op_type_ptr) { + const char* GenerateDisplayName(const char* op_type) { + if (!op_type) { return nullptr; } Tid tid = std::this_thread::get_id(); std::lock_guard lock(mutex_); if (tid_to_op_type_.find(tid) == tid_to_op_type_.end()) { - return op_type_ptr; + return op_type; } - std::string name = tid_to_op_type_[tid] + "::" + std::string(op_type_ptr); + // + std::string name = MakeSubOperatorName(tid, op_type); return display_names_.insert(name).first->c_str(); } private: - /*! \brief class mutex*/ + /* !\brief make the display name for sub-operators */ + inline std::string MakeSubOperatorName(const Tid& tid, const char* op_type) { + return tid_to_op_type_[tid] + "::" + std::string(op_type); + } + /* !\brief make the display name for the pure python call back function i.e. + * forward() or backward() in the custom operator definition + */ + inline std::string MakePythonCodeName(const std::string& op_type) { + return op_type + "::pure_python"; + } + /*! \brief class mutex */ std::mutex mutex_; /* !\brief display names for sub-operators in custom ops */ std::unordered_set display_names_; - /* !\brief profiling tasks for pure python code in custom operators*/ + /* !\brief profiling tasks for pure python code in custom operators */ std::unordered_map tasks_; /* !\brief the maping from thread id to the registered name op the custom operator - * that is runnin on that thread + * that is runnin on that thread */ std::unordered_map tid_to_op_type_; }; diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h index a5160f36358f..1f1d74bd276c 100644 --- a/src/profiler/profiler.h +++ b/src/profiler/profiler.h @@ -1178,9 +1178,8 @@ struct ProfileOperator : public ProfileEvent { SetCategories(domain_.name()); ======= , attributes_(attributes) - , profiling_operator_(strcmp(name, "Dummy_Wait") && strcmp(name, "Custom")) - , profiling_task_(strcmp(name, "Dummy_Wait") && strcmp(name, "Custom")) { - if (strstr(name, "::")) { + , profiling(IsDeprecatedOperator(name)) { + if (IsSubOperatorOfCustom(name)) { as_task_.setDomain(&custom_op_domain); SetCategories(custom_op_domain.name()); } else { @@ -1196,19 +1195,19 @@ struct ProfileOperator : public ProfileEvent { void start(mxnet::Context::DeviceType dev_type, uint32_t dev_id) { dev_type_ = dev_type; dev_id_ = dev_id; - if (profiling_operator_) + if (profiling) { ProfileEvent::start(); - if (profiling_task_) as_task_.start(); + } } /*! * \brief Stop the profiling scope */ void stop() override { - if (profiling_task_) + if (profiling) { as_task_.stop(); - if (profiling_operator_) ProfileEvent::stop(); + } } /*! @@ -1257,6 +1256,19 @@ struct ProfileOperator : public ProfileEvent { start_time_, ProfileStat::NowInMicrosec(), attributes_.get()); } + /*! + * \brief Check if this operator is no longer profiled + * Notice that this operator may still be used for i.e. synchronization + */ + inline bool IsDeprecatedOperator(const char* name) { + return strcmp(name, "Dummy_Wait") && strcmp(name, "Custom"); + } + /*! + * \brief Check if this operator a sub-operator of a custom operator + */ + inline bool IsSubOperatorOfCustom(const char* name) { + return strstr(name, "::"); + } /*! \brief Also log the operator as a task in the operator domain */ ProfileTask as_task_; /* !\brief Operator name */ @@ -1269,10 +1281,8 @@ struct ProfileOperator : public ProfileEvent { static ProfileDomain domain_; /*! \brief Optional operator attributes */ std::unique_ptr attributes_; - /*! \brief Whether to profile as operator */ - const bool profiling_operator_; - /*! \brief Whether to profile as task */ - const bool profiling_task_; + /*! \brief Whether to profile or not */ + const bool profiling; }; /* diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py index 91d5b739dffd..31b4b2a3fbdc 100644 --- a/tests/python/unittest/test_profiler.py +++ b/tests/python/unittest/test_profiler.py @@ -23,6 +23,7 @@ import os import json from collections import OrderedDict +from common import run_in_spawned_process def enable_profiler(profile_filename, run=True, continuous_dump=False, aggregate_stats=False): profiler.set_config(profile_symbolic=True, @@ -291,7 +292,7 @@ def test_aggregate_duplication(): assert target_dict['Time']['operator']['_plus_scalar']['Count'] == 2 profiler.set_state('stop') -def test_custom_operator_profiling(): +def test_custom_operator_profiling(seed = None): class Sigmoid(mx.operator.CustomOp): def forward(self, is_train, req, in_data, out_data, aux): x = in_data[0].asnumpy() @@ -344,7 +345,7 @@ def create_operator(self, ctx, in_shapes, in_dtypes): and 'MySigmoid::_zeros' in target_dict['Time']['Custom Operator'] profiler.set_state('stop') -def test_custom_operator_profiling_multiple_custom_ops(): +def test_custom_operator_profiling_multiple_custom_ops(seed = None): class MyAdd(mx.operator.CustomOp): def forward(self, is_train, req, in_data, out_data, aux): self.assign(out_data[0], req[0], in_data[0] + 1) @@ -414,6 +415,11 @@ def create_operator(self, ctx, shapes, dtypes): and '_plus_scalar' in target_dict['Time']['operator'] profiler.set_state('stop') +def test_custom_operator_profiling_naive(): + # run the two tests above using Naive Engine + run_in_spawned_process(func = test_custom_operator_profiling, env = {'MXNET_ENGINE_TYPE' : "NaiveEngine"}) + run_in_spawned_process(func = test_custom_operator_profiling_multiple_custom_ops, env = {'MXNET_ENGINE_TYPE' : "NaiveEngine"}) + if __name__ == '__main__': import nose nose.runmodule() From 5fdaa5607f13126a1179906178447cbdd418fa50 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Thu, 20 Jun 2019 13:26:11 -0700 Subject: [PATCH 21/38] fix --- src/profiler/profiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h index 1f1d74bd276c..3b02215bd5a6 100644 --- a/src/profiler/profiler.h +++ b/src/profiler/profiler.h @@ -1232,7 +1232,7 @@ struct ProfileOperator : public ProfileEvent { if (attributes) { name_.append(attributes->to_string().c_str()); } - if (std::string(name).find("::") != std::string::npos) { + if (IsSubOperatorOfCustom(const char* name)) { categories_.set(custom_op_domain.name()); } else { categories_.set("operator"); From 693fa23a882561b1bf44e8e4f1e8bc7c661b0ba5 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Thu, 20 Jun 2019 13:30:37 -0700 Subject: [PATCH 22/38] fix --- src/profiler/profiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h index 3b02215bd5a6..337ea9fe8a2d 100644 --- a/src/profiler/profiler.h +++ b/src/profiler/profiler.h @@ -1258,7 +1258,7 @@ struct ProfileOperator : public ProfileEvent { } /*! * \brief Check if this operator is no longer profiled - * Notice that this operator may still be used for i.e. synchronization + * Notice that this operator may still be used for e.g synchronization */ inline bool IsDeprecatedOperator(const char* name) { return strcmp(name, "Dummy_Wait") && strcmp(name, "Custom"); From 602b3a212d678b4fc0b657ecaedae23581bb5a94 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Thu, 20 Jun 2019 13:44:27 -0700 Subject: [PATCH 23/38] fix --- src/profiler/profiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h index 337ea9fe8a2d..b4186121da1a 100644 --- a/src/profiler/profiler.h +++ b/src/profiler/profiler.h @@ -1232,7 +1232,7 @@ struct ProfileOperator : public ProfileEvent { if (attributes) { name_.append(attributes->to_string().c_str()); } - if (IsSubOperatorOfCustom(const char* name)) { + if (IsSubOperatorOfCustom(name)) { categories_.set(custom_op_domain.name()); } else { categories_.set("operator"); From c4a2355e02d8ab513f5d6a30938ea44e977d6cb8 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Thu, 20 Jun 2019 14:21:47 -0700 Subject: [PATCH 24/38] fix --- src/profiler/profiler.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h index b4186121da1a..cdd4b1d3e2b9 100644 --- a/src/profiler/profiler.h +++ b/src/profiler/profiler.h @@ -1260,13 +1260,13 @@ struct ProfileOperator : public ProfileEvent { * \brief Check if this operator is no longer profiled * Notice that this operator may still be used for e.g synchronization */ - inline bool IsDeprecatedOperator(const char* name) { + inline static bool IsDeprecatedOperator(const char* name) { return strcmp(name, "Dummy_Wait") && strcmp(name, "Custom"); } /*! * \brief Check if this operator a sub-operator of a custom operator */ - inline bool IsSubOperatorOfCustom(const char* name) { + inline static bool IsSubOperatorOfCustom(const char* name) { return strstr(name, "::"); } /*! \brief Also log the operator as a task in the operator domain */ From 73b764baf1fdf71ec8ba785025149ea6f6937c0d Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Fri, 21 Jun 2019 11:32:59 -0700 Subject: [PATCH 25/38] add isprofiling check to onCustomStart --- src/engine/naive_engine.cc | 1 - src/profiler/custom_op_profiler.h | 11 ++++++++--- src/profiler/profiler.h | 5 +++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/engine/naive_engine.cc b/src/engine/naive_engine.cc index 80f0e1111349..9f44b55cfcaf 100644 --- a/src/engine/naive_engine.cc +++ b/src/engine/naive_engine.cc @@ -160,7 +160,6 @@ class NaiveEngine final : public Engine { this->req_completed_ = false; profiler::Profiler *profiler = profiler::Profiler::Get(); NaiveOpr *opr = nullptr; - // Try to get a new name. Only not null for sub-operators of a custom operator const bool profiling = opr_name && profiler->IsProfiling(profiler::Profiler::kImperative); // GenerateDisplayName() will return a pointer to the correct name of the operator const char* display_name = profiling ? diff --git a/src/profiler/custom_op_profiler.h b/src/profiler/custom_op_profiler.h index 4f0d006403e6..aa1c05fc712c 100644 --- a/src/profiler/custom_op_profiler.h +++ b/src/profiler/custom_op_profiler.h @@ -53,6 +53,10 @@ class CustomOpProfiler { * \param op_type The registed name of the custom operator */ void OnCustomBegin(const std::string& op_type) { + profiler::Profiler *profiler = profiler::Profiler::Get(); + if (!profiler->IsProfiling(profiler::Profiler::kImperative)) { + return; + } const Tid tid = std::this_thread::get_id(); const std::string task_name = MakePythonCodeName(op_type); std::lock_guard lock(mutex_); @@ -68,9 +72,10 @@ class CustomOpProfiler { void OnCustomEnd() { const Tid tid = std::this_thread::get_id(); std::lock_guard lock(mutex_); - // Theoretically this check will never fail if we use onBegin and onEnd in pairs - CHECK(tasks_.find(tid) != tasks_.end()) << "thread_id not found. " << - "Please use OnCustomBegin() and OnCustomEnd() in pairs on the same thread."; + // this can happen if we are not profiling + if (tasks_.find(tid) == tasks_.end()) { + return; + } tasks_[tid]->stop(); tasks_.erase(tid); tid_to_op_type_.erase(tid); diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h index cdd4b1d3e2b9..7e519b9ab58f 100644 --- a/src/profiler/profiler.h +++ b/src/profiler/profiler.h @@ -1178,7 +1178,7 @@ struct ProfileOperator : public ProfileEvent { SetCategories(domain_.name()); ======= , attributes_(attributes) - , profiling(IsDeprecatedOperator(name)) { + , profiling(!IsDeprecatedOperator(name)) { if (IsSubOperatorOfCustom(name)) { as_task_.setDomain(&custom_op_domain); SetCategories(custom_op_domain.name()); @@ -1261,7 +1261,8 @@ struct ProfileOperator : public ProfileEvent { * Notice that this operator may still be used for e.g synchronization */ inline static bool IsDeprecatedOperator(const char* name) { - return strcmp(name, "Dummy_Wait") && strcmp(name, "Custom"); + return strcmp(name, "Dummy_Wait") == 0 || + strcmp(name, "Custom") == 0 || strcmp(name, "_backward_Custom") == 0; } /*! * \brief Check if this operator a sub-operator of a custom operator From afce2f0f36a4b4482cc81ca24c746ca6f9b139e0 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Fri, 21 Jun 2019 15:36:52 -0700 Subject: [PATCH 26/38] fix --- src/engine/threaded_engine.cc | 11 +++++------ src/profiler/custom_op_profiler.h | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/engine/threaded_engine.cc b/src/engine/threaded_engine.cc index ac1f112383e1..4375149c729c 100644 --- a/src/engine/threaded_engine.cc +++ b/src/engine/threaded_engine.cc @@ -287,8 +287,11 @@ void ThreadedEngine::DeleteOperator(OprHandle op) { void ThreadedEngine::Push(OprHandle op, Context exec_ctx, int priority, bool profiling) { BulkFlush(); - ThreadedOpr* threaded_opr = ThreadedOpr::CastFromBase(op); + if (profiling) { + threaded_opr->opr_name = + profiler::CustomOpProfiler::Get()->GenerateDisplayName(threaded_opr->opr_name); + } OprBlock* opr_block = OprBlock::New(); opr_block->opr = threaded_opr; @@ -334,12 +337,8 @@ void ThreadedEngine::PushAsync(AsyncFn fn, Context exec_ctx, } #endif const bool profiling = profiler_->IsProfiling(profiler::Profiler::kImperative); - // GenerateDisplayName() will return a pointer to the correct name of the operator - const char* display_name = profiling ? - profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) : - opr_name; ThreadedOpr *opr = NewOperator(std::move(fn), const_vars, mutable_vars, - prop, display_name, wait); + prop, opr_name, wait); opr->temporary = true; Push(opr, exec_ctx, priority, profiling); } diff --git a/src/profiler/custom_op_profiler.h b/src/profiler/custom_op_profiler.h index aa1c05fc712c..0bafeab36d23 100644 --- a/src/profiler/custom_op_profiler.h +++ b/src/profiler/custom_op_profiler.h @@ -98,7 +98,6 @@ class CustomOpProfiler { if (tid_to_op_type_.find(tid) == tid_to_op_type_.end()) { return op_type; } - // std::string name = MakeSubOperatorName(tid, op_type); return display_names_.insert(name).first->c_str(); } From 08a4d5440b733820d3a429672221c4b970fc3fb7 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Mon, 24 Jun 2019 10:49:41 -0700 Subject: [PATCH 27/38] rename dummy_wait --- src/operator/custom/custom-inl.h | 2 +- src/profiler/profiler.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/custom/custom-inl.h b/src/operator/custom/custom-inl.h index f720d6605bd3..3864c4be168d 100644 --- a/src/operator/custom/custom-inl.h +++ b/src/operator/custom/custom-inl.h @@ -149,7 +149,7 @@ class CustomOperator { ctx.async_on_complete(); }, - ctx.run_ctx.ctx, vars, vars2, FnProperty::kNoSkip, 0, "Dummy_Wait"); + ctx.run_ctx.ctx, vars, vars2, FnProperty::kNoSkip, 0, "CustomOperatorWait"); }); // increase num_threads if there is not enough threads to execute custom operator if (q_.size() > num_free_threads_) diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h index 7e519b9ab58f..ea9927674de4 100644 --- a/src/profiler/profiler.h +++ b/src/profiler/profiler.h @@ -1261,7 +1261,7 @@ struct ProfileOperator : public ProfileEvent { * Notice that this operator may still be used for e.g synchronization */ inline static bool IsDeprecatedOperator(const char* name) { - return strcmp(name, "Dummy_Wait") == 0 || + return strcmp(name, "CustomOperatorWait") == 0 || strcmp(name, "Custom") == 0 || strcmp(name, "_backward_Custom") == 0; } /*! From 51f5b1a2de1184c6346bb5944f16b9b3d6f479d8 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Mon, 24 Jun 2019 10:58:25 -0700 Subject: [PATCH 28/38] fix conflict --- src/profiler/profiler.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h index ea9927674de4..aeb04606f11b 100644 --- a/src/profiler/profiler.h +++ b/src/profiler/profiler.h @@ -1171,12 +1171,6 @@ struct ProfileOperator : public ProfileEvent { : ProfileEvent(name) , as_task_(name, &domain_) , name_(name) -<<<<<<< HEAD - , attributes_(attributes) { - // make as_task_ not to add stat to AggregateStats; otherwise we will add twice - as_task_.enableAggregateStats(false); - SetCategories(domain_.name()); -======= , attributes_(attributes) , profiling(!IsDeprecatedOperator(name)) { if (IsSubOperatorOfCustom(name)) { @@ -1185,7 +1179,8 @@ struct ProfileOperator : public ProfileEvent { } else { SetCategories(domain_.name()); } ->>>>>>> working version + // make as_task_ not to add stat to AggregateStats; otherwise we will add twice + as_task_.enableAggregateStats(false); } /*! * \brief Start the profiling scope From 4e5e14c08d610b377ffca05219cef5296e165675 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Mon, 24 Jun 2019 12:06:53 -0700 Subject: [PATCH 29/38] improve test --- tests/python/unittest/test_profiler.py | 52 +++++++++++++++++++------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py index 31b4b2a3fbdc..c783a2ed1545 100644 --- a/tests/python/unittest/test_profiler.py +++ b/tests/python/unittest/test_profiler.py @@ -292,7 +292,7 @@ def test_aggregate_duplication(): assert target_dict['Time']['operator']['_plus_scalar']['Count'] == 2 profiler.set_state('stop') -def test_custom_operator_profiling(seed = None): +def test_custom_operator_profiling(seed = None, file_name = None): class Sigmoid(mx.operator.CustomOp): def forward(self, is_train, req, in_data, out_data, aux): x = in_data[0].asnumpy() @@ -327,7 +327,8 @@ def infer_shape(self, in_shapes): def create_operator(self, ctx, in_shapes, in_dtypes): return Sigmoid() - file_name = 'test_custom_operator_profiling.json' + if file_name is None: + file_name = 'test_custom_operator_profiling.json' enable_profiler(profile_filename = file_name, run=True, continuous_dump=True,\ aggregate_stats=True) x = mx.nd.array([0, 1, 2, 3]) @@ -345,7 +346,8 @@ def create_operator(self, ctx, in_shapes, in_dtypes): and 'MySigmoid::_zeros' in target_dict['Time']['Custom Operator'] profiler.set_state('stop') -def test_custom_operator_profiling_multiple_custom_ops(seed = None): +def test_custom_operator_profiling_multiple_custom_ops_imperative(seed = None, \ + mode = 'imperative', file_name = None): class MyAdd(mx.operator.CustomOp): def forward(self, is_train, req, in_data, out_data, aux): self.assign(out_data[0], req[0], in_data[0] + 1) @@ -359,7 +361,7 @@ def __init__(self): super(MyAdd1Prop, self).__init__(need_top_grad=True) def list_arguments(self): - return ['in'] + return ['data'] def list_outputs(self): return ['output'] @@ -377,7 +379,7 @@ def __init__(self): super(MyAdd2Prop, self).__init__(need_top_grad=True) def list_arguments(self): - return ['in'] + return ['data'] def list_outputs(self): return ['output'] @@ -389,13 +391,23 @@ def infer_shape(self, in_shape): def create_operator(self, ctx, shapes, dtypes): return MyAdd() - file_name = 'test_custom_operator_profiling_multiple_custom_ops.json' + if file_name is None: + file_name = 'test_custom_operator_profiling_multiple_custom_ops_imperative.json' enable_profiler(profile_filename = file_name, run=True, continuous_dump=True,\ aggregate_stats=True) inp = mx.nd.zeros(shape=(100, 100)) - x = inp + 1 - y = mx.nd.Custom(inp, op_type='MyAdd1') - z = mx.nd.Custom(inp, op_type='MyAdd2') + if mode == 'imperative': + x = inp + 1 + y = mx.nd.Custom(inp, op_type='MyAdd1') + z = mx.nd.Custom(inp, op_type='MyAdd2') + elif mode == 'symbolic': + a = mx.symbol.Variable('a') + b = a + 1 + c = mx.symbol.Custom(data=a, op_type='MyAdd1') + d = mx.symbol.Custom(data=a, op_type='MyAdd2') + b.bind(mx.cpu(), {'a': inp}).forward() + c.bind(mx.cpu(), {'a': inp}).forward() + d.bind(mx.cpu(), {'a': inp}).forward() mx.nd.waitall() profiler.dump(False) debug_str = profiler.dumps(format = 'json') @@ -415,10 +427,24 @@ def create_operator(self, ctx, shapes, dtypes): and '_plus_scalar' in target_dict['Time']['operator'] profiler.set_state('stop') -def test_custom_operator_profiling_naive(): - # run the two tests above using Naive Engine - run_in_spawned_process(func = test_custom_operator_profiling, env = {'MXNET_ENGINE_TYPE' : "NaiveEngine"}) - run_in_spawned_process(func = test_custom_operator_profiling_multiple_custom_ops, env = {'MXNET_ENGINE_TYPE' : "NaiveEngine"}) +def test_custom_operator_profiling_multiple_custom_ops_symbolic(seed = None, file_name = None): + if file_name is None: + 'test_custom_operator_profiling_multiple_custom_ops_symbolic.json' + test_custom_operator_profiling_multiple_custom_ops_imperative(mode = 'symbolic', \ + file_name = file_name) + +def test_custom_operator_profiling_naive_engine(): + # run the three tests above using Naive Engine + run_in_spawned_process(test_custom_operator_profiling, \ + {'MXNET_ENGINE_TYPE' : "NaiveEngine"}, \ + 'test_custom_operator_profiling_naive.json') + run_in_spawned_process(test_custom_operator_profiling_multiple_custom_ops_imperative, \ + {'MXNET_ENGINE_TYPE' : "NaiveEngine"}, \ + 'imperative', \ + 'test_custom_operator_profiling_multiple_custom_ops_imperative_naive.json') + run_in_spawned_process(test_custom_operator_profiling_multiple_custom_ops_symbolic, \ + {'MXNET_ENGINE_TYPE' : "NaiveEngine"}, \ + 'test_custom_operator_profiling_multiple_custom_ops_symbolic_naive.json') if __name__ == '__main__': import nose From e9a79820b6d0243afd7c173a89f5b78ad97d625a Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Mon, 24 Jun 2019 13:22:06 -0700 Subject: [PATCH 30/38] fix --- tests/python/unittest/test_profiler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py index c783a2ed1545..3827dfa69ed4 100644 --- a/tests/python/unittest/test_profiler.py +++ b/tests/python/unittest/test_profiler.py @@ -411,6 +411,8 @@ def create_operator(self, ctx, shapes, dtypes): mx.nd.waitall() profiler.dump(False) debug_str = profiler.dumps(format = 'json') + print(file_name) + print(debug_str) target_dict = json.loads(debug_str) ''' We are calling _plus_scalar within MyAdd1 and MyAdd2 and outside both the custom From 79b4e0088eb2897357f15491f887374b4853181a Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Mon, 24 Jun 2019 14:31:09 -0700 Subject: [PATCH 31/38] fix test cases --- tests/python/unittest/test_profiler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py index 3827dfa69ed4..392fbaf176d3 100644 --- a/tests/python/unittest/test_profiler.py +++ b/tests/python/unittest/test_profiler.py @@ -411,8 +411,6 @@ def create_operator(self, ctx, shapes, dtypes): mx.nd.waitall() profiler.dump(False) debug_str = profiler.dumps(format = 'json') - print(file_name) - print(debug_str) target_dict = json.loads(debug_str) ''' We are calling _plus_scalar within MyAdd1 and MyAdd2 and outside both the custom @@ -445,7 +443,9 @@ def test_custom_operator_profiling_naive_engine(): 'imperative', \ 'test_custom_operator_profiling_multiple_custom_ops_imperative_naive.json') run_in_spawned_process(test_custom_operator_profiling_multiple_custom_ops_symbolic, \ - {'MXNET_ENGINE_TYPE' : "NaiveEngine"}, \ + {'MXNET_ENGINE_TYPE' : "NaiveEngine", \ + 'MXNET_EXEC_BULK_EXEC_INFERENCE' : 0, \ + 'MXNET_EXEC_BULK_EXEC_TRAIN' : 0}, \ 'test_custom_operator_profiling_multiple_custom_ops_symbolic_naive.json') if __name__ == '__main__': From ba22ddaffe79815490e585d9c21518b9c607fb76 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Mon, 24 Jun 2019 16:46:23 -0700 Subject: [PATCH 32/38] fix test cases --- tests/python/unittest/test_profiler.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py index 392fbaf176d3..6317640121e1 100644 --- a/tests/python/unittest/test_profiler.py +++ b/tests/python/unittest/test_profiler.py @@ -411,6 +411,7 @@ def create_operator(self, ctx, shapes, dtypes): mx.nd.waitall() profiler.dump(False) debug_str = profiler.dumps(format = 'json') + print(debug_str) target_dict = json.loads(debug_str) ''' We are calling _plus_scalar within MyAdd1 and MyAdd2 and outside both the custom @@ -427,11 +428,12 @@ def create_operator(self, ctx, shapes, dtypes): and '_plus_scalar' in target_dict['Time']['operator'] profiler.set_state('stop') -def test_custom_operator_profiling_multiple_custom_ops_symbolic(seed = None, file_name = None): - if file_name is None: - 'test_custom_operator_profiling_multiple_custom_ops_symbolic.json' - test_custom_operator_profiling_multiple_custom_ops_imperative(mode = 'symbolic', \ - file_name = file_name) +def test_custom_operator_profiling_multiple_custom_ops_symbolic(): + run_in_spawned_process(test_custom_operator_profiling_multiple_custom_ops_imperative, \ + {'MXNET_EXEC_BULK_EXEC_INFERENCE' : 0, \ + 'MXNET_EXEC_BULK_EXEC_TRAIN' : 0}, \ + 'symbolic', \ + 'test_custom_operator_profiling_multiple_custom_ops_symbolic.json') def test_custom_operator_profiling_naive_engine(): # run the three tests above using Naive Engine @@ -442,10 +444,11 @@ def test_custom_operator_profiling_naive_engine(): {'MXNET_ENGINE_TYPE' : "NaiveEngine"}, \ 'imperative', \ 'test_custom_operator_profiling_multiple_custom_ops_imperative_naive.json') - run_in_spawned_process(test_custom_operator_profiling_multiple_custom_ops_symbolic, \ + run_in_spawned_process(test_custom_operator_profiling_multiple_custom_ops_imperative, \ {'MXNET_ENGINE_TYPE' : "NaiveEngine", \ 'MXNET_EXEC_BULK_EXEC_INFERENCE' : 0, \ 'MXNET_EXEC_BULK_EXEC_TRAIN' : 0}, \ + 'symbolic', \ 'test_custom_operator_profiling_multiple_custom_ops_symbolic_naive.json') if __name__ == '__main__': From 2db1243b40a8defa011bf76ae65b2f7cce999565 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Mon, 24 Jun 2019 21:47:25 -0700 Subject: [PATCH 33/38] fix testcases --- src/profiler/custom_op_profiler.h | 8 ++++---- tests/python/unittest/test_profiler.py | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/profiler/custom_op_profiler.h b/src/profiler/custom_op_profiler.h index 0bafeab36d23..93ae3a64236f 100644 --- a/src/profiler/custom_op_profiler.h +++ b/src/profiler/custom_op_profiler.h @@ -54,13 +54,13 @@ class CustomOpProfiler { */ void OnCustomBegin(const std::string& op_type) { profiler::Profiler *profiler = profiler::Profiler::Get(); - if (!profiler->IsProfiling(profiler::Profiler::kImperative)) { - return; - } const Tid tid = std::this_thread::get_id(); const std::string task_name = MakePythonCodeName(op_type); std::lock_guard lock(mutex_); tid_to_op_type_[tid] = op_type; + if (!profiler->IsProfiling(profiler::Profiler::kImperative)) { + return; + } tasks_[tid] = std::make_unique(task_name.c_str(), &custom_op_domain); tasks_[tid]->start(); } @@ -72,13 +72,13 @@ class CustomOpProfiler { void OnCustomEnd() { const Tid tid = std::this_thread::get_id(); std::lock_guard lock(mutex_); + tid_to_op_type_.erase(tid); // this can happen if we are not profiling if (tasks_.find(tid) == tasks_.end()) { return; } tasks_[tid]->stop(); tasks_.erase(tid); - tid_to_op_type_.erase(tid); } /*! diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py index 6317640121e1..ab7d29f104ca 100644 --- a/tests/python/unittest/test_profiler.py +++ b/tests/python/unittest/test_profiler.py @@ -411,7 +411,6 @@ def create_operator(self, ctx, shapes, dtypes): mx.nd.waitall() profiler.dump(False) debug_str = profiler.dumps(format = 'json') - print(debug_str) target_dict = json.loads(debug_str) ''' We are calling _plus_scalar within MyAdd1 and MyAdd2 and outside both the custom From 0c4294b73215d0b1e021fed1ca8c5391a252820a Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Tue, 25 Jun 2019 12:07:54 -0700 Subject: [PATCH 34/38] revert back to reduce overhead --- src/operator/custom/custom-inl.h | 22 ++++++++++++++++------ src/profiler/custom_op_profiler.h | 11 +++-------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/operator/custom/custom-inl.h b/src/operator/custom/custom-inl.h index 3864c4be168d..ea4580609407 100644 --- a/src/operator/custom/custom-inl.h +++ b/src/operator/custom/custom-inl.h @@ -80,9 +80,14 @@ class CustomOperator { const std::vector& outputs, const std::string op_type = "") { if (naive_engine_) { - profiler::CustomOpProfiler::Get()->OnCustomBegin(op_type); - func(); - profiler::CustomOpProfiler::Get()->OnCustomEnd(); + if (profiler::Profiler::Get()->IsProfiling(profiler::Profiler::kImperative)) { + profiler::CustomOpProfiler::Get()->OnCustomBegin(op_type); + func(); + profiler::CustomOpProfiler::Get()->OnCustomEnd(); + } + else { + func(); + } for (size_t i = 0, out_idx = 0; i < arrs.size(); i++) { if (arrs[i].storage_type() == kDefaultStorage || arrs[i].storage_type() == kUndefinedStorage) @@ -101,9 +106,14 @@ class CustomOperator { bool prev_training = Imperative::Get()->set_is_training(training); try { - profiler::CustomOpProfiler::Get()->OnCustomBegin(op_type); - func(); - profiler::CustomOpProfiler::Get()->OnCustomEnd(); + if (profiler::Profiler::Get()->IsProfiling(profiler::Profiler::kImperative)) { + profiler::CustomOpProfiler::Get()->OnCustomBegin(op_type); + func(); + profiler::CustomOpProfiler::Get()->OnCustomEnd(); + } + else { + func(); + } } catch (dmlc::Error& e) { exception_ = std::make_shared(std::current_exception()); diff --git a/src/profiler/custom_op_profiler.h b/src/profiler/custom_op_profiler.h index 93ae3a64236f..9449e5b84a3a 100644 --- a/src/profiler/custom_op_profiler.h +++ b/src/profiler/custom_op_profiler.h @@ -53,14 +53,10 @@ class CustomOpProfiler { * \param op_type The registed name of the custom operator */ void OnCustomBegin(const std::string& op_type) { - profiler::Profiler *profiler = profiler::Profiler::Get(); const Tid tid = std::this_thread::get_id(); const std::string task_name = MakePythonCodeName(op_type); std::lock_guard lock(mutex_); tid_to_op_type_[tid] = op_type; - if (!profiler->IsProfiling(profiler::Profiler::kImperative)) { - return; - } tasks_[tid] = std::make_unique(task_name.c_str(), &custom_op_domain); tasks_[tid]->start(); } @@ -73,10 +69,9 @@ class CustomOpProfiler { const Tid tid = std::this_thread::get_id(); std::lock_guard lock(mutex_); tid_to_op_type_.erase(tid); - // this can happen if we are not profiling - if (tasks_.find(tid) == tasks_.end()) { - return; - } + // this should never fail + CHECK(tasks_.find(tid) != tasks_.end()) << "thread_id not found. " << + "Please use OnCustomBegin() and OnCustomEnd() in pairs."; tasks_[tid]->stop(); tasks_.erase(tid); } From c939067c8f28b63fad47680bd0b433939b5bb744 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Tue, 25 Jun 2019 12:13:57 -0700 Subject: [PATCH 35/38] fix style --- src/operator/custom/custom-inl.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/operator/custom/custom-inl.h b/src/operator/custom/custom-inl.h index ea4580609407..1173b14eb2fe 100644 --- a/src/operator/custom/custom-inl.h +++ b/src/operator/custom/custom-inl.h @@ -84,8 +84,7 @@ class CustomOperator { profiler::CustomOpProfiler::Get()->OnCustomBegin(op_type); func(); profiler::CustomOpProfiler::Get()->OnCustomEnd(); - } - else { + } else { func(); } for (size_t i = 0, out_idx = 0; i < arrs.size(); i++) { @@ -110,8 +109,7 @@ class CustomOperator { profiler::CustomOpProfiler::Get()->OnCustomBegin(op_type); func(); profiler::CustomOpProfiler::Get()->OnCustomEnd(); - } - else { + } else { func(); } } catch (dmlc::Error& e) { From 0f5e47e92ccfe9c8b6b076595ea0cc7e5ac6c4ca Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Tue, 25 Jun 2019 18:14:15 -0700 Subject: [PATCH 36/38] Re-Trigger build From 8aac853c5dd578d10121538e5e00f039b112c88a Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Thu, 27 Jun 2019 19:01:04 -0700 Subject: [PATCH 37/38] rename var --- src/profiler/profiler.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h index aeb04606f11b..49ef99da5bba 100644 --- a/src/profiler/profiler.h +++ b/src/profiler/profiler.h @@ -1172,7 +1172,7 @@ struct ProfileOperator : public ProfileEvent { , as_task_(name, &domain_) , name_(name) , attributes_(attributes) - , profiling(!IsDeprecatedOperator(name)) { + , profiling_(!IsDeprecatedOperator(name)) { if (IsSubOperatorOfCustom(name)) { as_task_.setDomain(&custom_op_domain); SetCategories(custom_op_domain.name()); @@ -1190,7 +1190,7 @@ struct ProfileOperator : public ProfileEvent { void start(mxnet::Context::DeviceType dev_type, uint32_t dev_id) { dev_type_ = dev_type; dev_id_ = dev_id; - if (profiling) { + if (profiling_) { ProfileEvent::start(); as_task_.start(); } @@ -1199,7 +1199,7 @@ struct ProfileOperator : public ProfileEvent { * \brief Stop the profiling scope */ void stop() override { - if (profiling) { + if (profiling_) { as_task_.stop(); ProfileEvent::stop(); } @@ -1278,7 +1278,7 @@ struct ProfileOperator : public ProfileEvent { /*! \brief Optional operator attributes */ std::unique_ptr attributes_; /*! \brief Whether to profile or not */ - const bool profiling; + const bool profiling_; }; /* From 32f5eafe05457d2e38df6bc8bfca1baab1068b55 Mon Sep 17 00:00:00 2001 From: zha0q1 Date: Thu, 27 Jun 2019 21:49:25 -0700 Subject: [PATCH 38/38] Re-Trigger build