Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Custom Operator Profiling Enhancement #15210

Merged
merged 38 commits into from
Jun 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
cb8479c
working version
Zha0q1 Jun 11, 2019
d549f5e
style fix
Zha0q1 Jun 11, 2019
e57ebd5
several fixes
Zha0q1 Jun 12, 2019
ed1794e
resolve issues in the comments
Zha0q1 Jun 14, 2019
5cf59fd
revert to using thread-safe Get() for singleton class CustomOpProfiler
Zha0q1 Jun 14, 2019
b9126a7
indentation
Zha0q1 Jun 14, 2019
ed93849
Now supports Naive Engine
Zha0q1 Jun 17, 2019
cb21b2b
style fix
Zha0q1 Jun 17, 2019
e21f18e
tidiness
Zha0q1 Jun 17, 2019
c908119
tests added
Zha0q1 Jun 17, 2019
51b862a
style fix
Zha0q1 Jun 17, 2019
36fcf62
add a new test case which has multiple custom ops
Zha0q1 Jun 17, 2019
e553a38
testcases fix
Zha0q1 Jun 18, 2019
9f1275d
fix
Zha0q1 Jun 18, 2019
0fd48f5
fix style
Zha0q1 Jun 18, 2019
9820bac
minor naive engine fix
Zha0q1 Jun 19, 2019
89de9f4
simplify some branching logic
Zha0q1 Jun 19, 2019
3e97083
better desing style
Zha0q1 Jun 19, 2019
5b3ab96
fix
Zha0q1 Jun 19, 2019
b40b432
fix
Zha0q1 Jun 20, 2019
5fdaa56
fix
Zha0q1 Jun 20, 2019
693fa23
fix
Zha0q1 Jun 20, 2019
602b3a2
fix
Zha0q1 Jun 20, 2019
c4a2355
fix
Zha0q1 Jun 20, 2019
73b764b
add isprofiling check to onCustomStart
Zha0q1 Jun 21, 2019
afce2f0
fix
Zha0q1 Jun 21, 2019
08a4d54
rename dummy_wait
Zha0q1 Jun 24, 2019
51f5b1a
fix conflict
Zha0q1 Jun 24, 2019
4e5e14c
improve test
Zha0q1 Jun 24, 2019
e9a7982
fix
Zha0q1 Jun 24, 2019
79b4e00
fix test cases
Zha0q1 Jun 24, 2019
ba22dda
fix test cases
Zha0q1 Jun 24, 2019
2db1243
fix testcases
Zha0q1 Jun 25, 2019
0c4294b
revert back to reduce overhead
Zha0q1 Jun 25, 2019
c939067
fix style
Zha0q1 Jun 25, 2019
0f5e47e
Re-Trigger build
Zha0q1 Jun 26, 2019
8aac853
rename var
Zha0q1 Jun 28, 2019
32f5eaf
Re-Trigger build
Zha0q1 Jun 28, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/engine/naive_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "../profiler/profiler.h"
#include "./openmp.h"
#include "../common/object_pool.h"
#include "../profiler/custom_op_profiler.h"

namespace mxnet {
namespace engine {
Expand Down Expand Up @@ -160,15 +161,19 @@ class NaiveEngine final : public Engine {
profiler::Profiler *profiler = profiler::Profiler::Get();
NaiveOpr *opr = nullptr;
const bool profiling = opr_name && profiler->IsProfiling(profiler::Profiler::kImperative);
// GenerateDisplayName() will return a pointer to the correct name of the operator
const char* display_name = profiling ?
profiler::CustomOpProfiler::Get()->GenerateDisplayName(opr_name) :
opr_name;
if (profiling) {
opr = NewOperator(exec_fun, const_vars, mutable_vars,
Zha0q1 marked this conversation as resolved.
Show resolved Hide resolved
prop, opr_name)->Cast<NaiveOpr>();
prop, display_name)->Cast<NaiveOpr>();
opr->profiling = profiling;
std::unique_ptr<profiler::ProfileOperator::Attributes> attrs;
if (profiler->AggregateEnabled()) {
attrs.reset(new profiler::ProfileOperator::Attributes());
}
opr->opr_profile.reset(new profiler::ProfileOperator(opr->opr_name, attrs.release()));
opr->opr_profile.reset(new profiler::ProfileOperator(display_name, attrs.release()));
opr->opr_profile->start(exec_ctx.dev_type, exec_ctx.dev_id);
}
if (exec_ctx.dev_mask() == gpu::kDevMask) {
Expand Down
10 changes: 7 additions & 3 deletions src/engine/threaded_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,11 @@ void ThreadedEngine::DeleteOperator(OprHandle op) {

void ThreadedEngine::Push(OprHandle op, Context exec_ctx, int priority, bool profiling) {
BulkFlush();

ThreadedOpr* threaded_opr = ThreadedOpr::CastFromBase(op);
if (profiling) {
threaded_opr->opr_name =
profiler::CustomOpProfiler::Get()->GenerateDisplayName(threaded_opr->opr_name);
}
OprBlock* opr_block = OprBlock::New();
opr_block->opr = threaded_opr;

Expand Down Expand Up @@ -333,9 +336,10 @@ void ThreadedEngine::PushAsync(AsyncFn fn, Context exec_ctx,
<< device_count_;
}
#endif
ThreadedOpr *opr = NewOperator(std::move(fn), const_vars, mutable_vars, prop, opr_name, wait);
opr->temporary = true;
const bool profiling = profiler_->IsProfiling(profiler::Profiler::kImperative);
ThreadedOpr *opr = NewOperator(std::move(fn), const_vars, mutable_vars,
prop, opr_name, wait);
opr->temporary = true;
Push(opr, exec_ctx, priority, profiling);
}

Expand Down
1 change: 1 addition & 0 deletions src/engine/threaded_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "../profiler/profiler.h"
#include "./openmp.h"
#include "../common/object_pool.h"
#include "../profiler/custom_op_profiler.h"

namespace mxnet {
namespace engine {
Expand Down
23 changes: 18 additions & 5 deletions src/operator/custom/custom-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <condition_variable>
#include <queue>
#include "../operator_common.h"
#include "../../profiler/custom_op_profiler.h"

namespace mxnet {
namespace op {
Expand Down Expand Up @@ -76,9 +77,16 @@ class CustomOperator {
bool training, const std::vector<NDArray>& arrs,
const std::vector<int>& tags,
const std::unordered_set<int>& output_tags,
const std::vector<NDArray>& outputs) {
const std::vector<NDArray>& outputs,
const std::string op_type = "") {
if (naive_engine_) {
func();
if (profiler::Profiler::Get()->IsProfiling(profiler::Profiler::kImperative)) {
profiler::CustomOpProfiler::Get()->OnCustomBegin(op_type);
func();
profiler::CustomOpProfiler::Get()->OnCustomEnd();
} else {
func();
}
for (size_t i = 0, out_idx = 0; i < arrs.size(); i++) {
if (arrs[i].storage_type() == kDefaultStorage ||
arrs[i].storage_type() == kUndefinedStorage)
Expand All @@ -97,7 +105,13 @@ class CustomOperator {
bool prev_training = Imperative::Get()->set_is_training(training);

try {
func();
if (profiler::Profiler::Get()->IsProfiling(profiler::Profiler::kImperative)) {
profiler::CustomOpProfiler::Get()->OnCustomBegin(op_type);
func();
profiler::CustomOpProfiler::Get()->OnCustomEnd();
} else {
func();
}
} catch (dmlc::Error& e) {
exception_ =
std::make_shared<std::exception_ptr>(std::current_exception());
Expand Down Expand Up @@ -143,8 +157,7 @@ class CustomOperator {

ctx.async_on_complete();
},
ctx.run_ctx.ctx, vars, vars2, FnProperty::kNoSkip, 0,
"CustomOperator");
ctx.run_ctx.ctx, vars, vars2, FnProperty::kNoSkip, 0, "CustomOperatorWait");
});
// increase num_threads if there is not enough threads to execute custom operator
if (q_.size() > num_free_threads_)
Expand Down
4 changes: 2 additions & 2 deletions src/operator/custom/custom.cc
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ void ForwardEx(const OpStatePtr& state, const OpContext& ctx,
static_cast<int>(ctx.is_train),
params.info->contexts[kCustomOpForward]));
},
ctx, false, ctx.is_train, cpys, tags, output_tags, outputs);
ctx, false, ctx.is_train, cpys, tags, output_tags, outputs, params.op_type);
}

void BackwardEx(const OpStatePtr& state, const OpContext& ctx,
Expand Down Expand Up @@ -415,7 +415,7 @@ void BackwardEx(const OpStatePtr& state, const OpContext& ctx,
ptrs.size(), const_cast<void**>(ptrs.data()), const_cast<int*>(tags.data()),
reinterpret_cast<const int*>(req.data()), static_cast<int>(ctx.is_train),
params.info->contexts[kCustomOpBackward]));
}, ctx, false, ctx.is_train, cpys, tags, output_tags, outputs);
}, ctx, false, ctx.is_train, cpys, tags, output_tags, outputs, "_backward_" + params.op_type);
}

// infer storage backward function for custom op which assigns kDefaultStorage for
Expand Down
125 changes: 125 additions & 0 deletions src/profiler/custom_op_profiler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef MXNET_PROFILER_CUSTOM_OP_PROFILER_H_
#define MXNET_PROFILER_CUSTOM_OP_PROFILER_H_

#include <string>
#include <unordered_set>
#include <unordered_map>
#include <thread>
#include "./profiler.h"

namespace mxnet {
namespace profiler {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Custom profiler doesnt respect start and stop for profiling currently. This means that irrespective of profiler start stop this will profile. You can take a look at device storage profiler for doing similar checks -> mode should be kSymbolic | kImperative.

Copy link
Contributor Author

@Zha0q1 Zha0q1 Jun 21, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a check profiler->IsProfiling(profiler::Profiler::kImperative), which was the same check used for profiling other operators

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay , what happens for symbolic case ? does it fallback to old custom op display for profiling or does it not display anything at all for custom op.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay , what happens for symbolic case ? does it fallback to old custom op display for profiling or does it not display anything at all for custom op.

Currently if you set profile_imperative to False, then you will have no profiling events for pure_python, aka the forward and backward callbacks. This is the case for both symbolic and imperative mode. I think this should be fine though. Considering that within custom operators,
users are mostly calling regular NDArray operators in imperative mode, I think if they ever want to peek inside custom operators, they must set profile_imperative to True.

To sum up, if profile_imperative == True, uses would have all the fine grained profiling events including pure_python and sub-operators. If profile_imperative == False, they would nothing.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think profile_symbolic should also have this info, otherwise users may have a symbolic model with custom op but won't see any info on custom op.


using Tid = std::thread::id;
using TaskPtr = std::unique_ptr<ProfileTask>;

/*!
* \brief Singleton class to assist profiling python callback of custom operators
* and to assist linking sub-operators to custom operators
*/
class CustomOpProfiler {
Zha0q1 marked this conversation as resolved.
Show resolved Hide resolved
public:
static CustomOpProfiler* Get() {
static std::mutex mtx;
static std::unique_ptr<CustomOpProfiler> prof = nullptr;
if (!prof) {
std::unique_lock<std::mutex> lk(mtx);
if (!prof)
prof = std::make_unique<CustomOpProfiler>();
}
return prof.get();
}
/*!
* \brief Called before the callback of custom operators to start a profile task for python
* code execution time
* \param op_type The registed name of the custom operator
*/
void OnCustomBegin(const std::string& op_type) {
const Tid tid = std::this_thread::get_id();
const std::string task_name = MakePythonCodeName(op_type);
std::lock_guard<std::mutex> lock(mutex_);
tid_to_op_type_[tid] = op_type;
tasks_[tid] = std::make_unique<ProfileTask>(task_name.c_str(), &custom_op_domain);
tasks_[tid]->start();
}

/*!
* \brief Called after the callback of custom operators to stop the profile task for python
* code execution time
*/
void OnCustomEnd() {
const Tid tid = std::this_thread::get_id();
std::lock_guard<std::mutex> lock(mutex_);
tid_to_op_type_.erase(tid);
// this should never fail
CHECK(tasks_.find(tid) != tasks_.end()) << "thread_id not found. " <<
"Please use OnCustomBegin() and OnCustomEnd() in pairs.";
tasks_[tid]->stop();
tasks_.erase(tid);
}

/*!
* \brief Generate a display name for sub-operators, which is the name used for OprBlock
* and later by profiler, and store it in a unordered_set so that it can be referenced
* in the future.
* Notice if the operator is not a sub-operator, just return the char pointer back.
* \param op_type The registed name of the operator
* \return Returns a pointer to the display name generated
*/
const char* GenerateDisplayName(const char* op_type) {
if (!op_type) {
return nullptr;
}
Tid tid = std::this_thread::get_id();
std::lock_guard<std::mutex> lock(mutex_);
if (tid_to_op_type_.find(tid) == tid_to_op_type_.end()) {
return op_type;
}
Zha0q1 marked this conversation as resolved.
Show resolved Hide resolved
std::string name = MakeSubOperatorName(tid, op_type);
return display_names_.insert(name).first->c_str();
Zha0q1 marked this conversation as resolved.
Show resolved Hide resolved
}

private:
/* !\brief make the display name for sub-operators */
inline std::string MakeSubOperatorName(const Tid& tid, const char* op_type) {
return tid_to_op_type_[tid] + "::" + std::string(op_type);
}
/* !\brief make the display name for the pure python call back function i.e.
* forward() or backward() in the custom operator definition
*/
inline std::string MakePythonCodeName(const std::string& op_type) {
return op_type + "::pure_python";
}
/*! \brief class mutex */
std::mutex mutex_;
/* !\brief display names for sub-operators in custom ops */
std::unordered_set<std::string> display_names_;
/* !\brief profiling tasks for pure python code in custom operators */
std::unordered_map<Tid, TaskPtr> tasks_;
/* !\brief the maping from thread id to the registered name op the custom operator
* that is runnin on that thread
*/
std::unordered_map<Tid, std::string> tid_to_op_type_;
};
} // namespace profiler
} // namespace mxnet

#endif // MXNET_PROFILER_CUSTOM_OP_PROFILER_H_
53 changes: 46 additions & 7 deletions src/profiler/profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,13 @@ struct ProfileTask : public ProfileDuration {
NVTX_ONLY_CODE(nvtx_duration_.reset(new nvtx::NVTXDuration(name)));
}

/*!
* \brief Set the domain
*/
void setDomain(ProfileDomain* domain) {
domain_ = domain;
}

/*!
* \brief Start the profiling scope
*/
Expand Down Expand Up @@ -1111,6 +1118,8 @@ struct ProfileMarker {
VTUNE_ONLY_CODE(std::unique_ptr<vtune::VTuneInstantMarker> vtune_instant_marker_);
};

static ProfileDomain custom_op_domain("Custom Operator");
Zha0q1 marked this conversation as resolved.
Show resolved Hide resolved

/*!
* \brief Operator profiler object. Logs as both an independent event and a task in
* the operator domain
Expand Down Expand Up @@ -1162,10 +1171,16 @@ struct ProfileOperator : public ProfileEvent {
: ProfileEvent(name)
, as_task_(name, &domain_)
, name_(name)
, attributes_(attributes) {
, attributes_(attributes)
, profiling_(!IsDeprecatedOperator(name)) {
if (IsSubOperatorOfCustom(name)) {
as_task_.setDomain(&custom_op_domain);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean use a tertiary operator?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I will keep it as a separate line. The reason is that we want to set the domain and category in the same if else block

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case you are calling as_task_ twice with the second call overwrites the previous one. Using a ternary operator during class initialization seems cleaner to me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still think that because line 1165 and line 1166 are basically doing the same thing, except for different objects, we should not move 1165 up. Also, I think keeping the initialization list concise is also good

Copy link
Contributor

@apeforest apeforest Jun 20, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a good design style is actually keep the body in constructor simple. In fact, if you do

Suggested change
as_task_.setDomain(&custom_op_domain);
as_task_(name, IsSubOperatorOfCustom(name) ? &custom_op_domain : &domain_);

Then you don't even need a if/else branch for SetCategories. You can simply write:

Suggested change
as_task_.setDomain(&custom_op_domain);
SetCategories(domain_.name())

because the domain_ here is already customer_domain in your if case.

Copy link
Contributor Author

@Zha0q1 Zha0q1 Jun 20, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because the domain_ here is already customer_domain in your if case.

I don't think that's true. domain_ is defined as "ProfileDomain ProfileOperator::domain_("operator");" in profiler.cc, line 44

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

domain_ is a global variable, not a member variable of ProfileOperator

SetCategories(custom_op_domain.name());
Zha0q1 marked this conversation as resolved.
Show resolved Hide resolved
} else {
SetCategories(domain_.name());
}
// make as_task_ not to add stat to AggregateStats; otherwise we will add twice
as_task_.enableAggregateStats(false);
SetCategories(domain_.name());
}
/*!
* \brief Start the profiling scope
Expand All @@ -1175,15 +1190,19 @@ struct ProfileOperator : public ProfileEvent {
void start(mxnet::Context::DeviceType dev_type, uint32_t dev_id) {
dev_type_ = dev_type;
dev_id_ = dev_id;
ProfileEvent::start();
as_task_.start();
if (profiling_) {
ProfileEvent::start();
as_task_.start();
}
}
/*!
* \brief Stop the profiling scope
*/
void stop() override {
as_task_.stop();
ProfileEvent::stop();
if (profiling_) {
as_task_.stop();
ProfileEvent::stop();
}
}

/*!
Expand All @@ -1208,7 +1227,11 @@ struct ProfileOperator : public ProfileEvent {
if (attributes) {
name_.append(attributes->to_string().c_str());
}
categories_.set("operator");
if (IsSubOperatorOfCustom(name)) {
categories_.set(custom_op_domain.name());
} else {
categories_.set("operator");
}
items_[kStart].timestamp_ = start_time;
items_[kStop].timestamp_ = stop_time;
}
Expand All @@ -1228,6 +1251,20 @@ struct ProfileOperator : public ProfileEvent {
start_time_, ProfileStat::NowInMicrosec(),
attributes_.get());
}
/*!
* \brief Check if this operator is no longer profiled
* Notice that this operator may still be used for e.g synchronization
*/
inline static bool IsDeprecatedOperator(const char* name) {
return strcmp(name, "CustomOperatorWait") == 0 ||
strcmp(name, "Custom") == 0 || strcmp(name, "_backward_Custom") == 0;
}
/*!
* \brief Check if this operator a sub-operator of a custom operator
*/
inline static bool IsSubOperatorOfCustom(const char* name) {
return strstr(name, "::");
}
/*! \brief Also log the operator as a task in the operator domain */
ProfileTask as_task_;
/* !\brief Operator name */
Expand All @@ -1240,6 +1277,8 @@ struct ProfileOperator : public ProfileEvent {
static ProfileDomain domain_;
/*! \brief Optional operator attributes */
std::unique_ptr<Attributes> attributes_;
/*! \brief Whether to profile or not */
const bool profiling_;
};

/*
Expand Down
Loading