Skip to content

Commit

Permalink
Merge branch 'develop' into multiple-parents
Browse files Browse the repository at this point in the history
  • Loading branch information
khuck committed Sep 30, 2024
2 parents 7a8f36b + 8354e6d commit 054c191
Show file tree
Hide file tree
Showing 19 changed files with 1,438 additions and 33 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,10 @@ if(APEX_WITH_KOKKOS)
if(APEX_BUILD_TESTS)
# Just for testing
SET(Kokkos_LIBRARY kokkoscore)
set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "" FORCE)
set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "" FORCE)
set(Kokkos_ARCH_NATIVE ON CACHE BOOL "" FORCE)
set(Kokkos_ENABLE_TUNING ON CACHE BOOL "" FORCE)
add_subdirectory(kokkos)
endif(APEX_BUILD_TESTS)
endif()
Expand Down
4 changes: 4 additions & 0 deletions src/apex/CMakeLists_hpx.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -322,9 +322,11 @@ set(apex_headers
dependency_tree.hpp
event_listener.hpp
exhaustive.hpp
genetic_search.hpp
gzstream.hpp
handler.hpp
memory_wrapper.hpp
nelder_mead.hpp
policy_handler.hpp
profile.hpp
profiler.hpp
Expand Down Expand Up @@ -361,9 +363,11 @@ set(apex_sources
event_listener.cpp
event_filter.cpp
exhaustive.cpp
genetic_search.cpp
gzstream.cpp
handler.cpp
memory_wrapper.cpp
nelder_mead.cpp
nvtx_listener.cpp
policy_handler.cpp
profile_reducer.cpp
Expand Down
1 change: 1 addition & 0 deletions src/apex/CMakeLists_standalone.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ exhaustive.cpp
genetic_search.cpp
handler.cpp
memory_wrapper.cpp
nelder_mead.cpp
nvtx_listener.cpp
${OTF2_SOURCE}
${perfetto_sources}
Expand Down
2 changes: 1 addition & 1 deletion src/apex/apex_kokkos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,11 +311,11 @@ void kokkosp_allocate_data(SpaceHandle_t handle, const char* name,
APEX_UNUSED(ptr);
std::stringstream ss;
ss << "Kokkos " << handle.name << " data, " << name;
ss << ": Bytes";
std::string tmp2{ss.str()};
memory_mtx.lock();
memory_map().insert(std::pair<void*,std::string>(ptr, tmp2));
memory_mtx.unlock();
ss << ": Bytes";
double bytes = (double)(size);
if (apex::apex_options::use_kokkos_counters()) {
apex::sample_value(tmp2, bytes);
Expand Down
89 changes: 84 additions & 5 deletions src/apex/apex_kokkos_tuning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,14 @@ class KokkosSession {
} else if (strncmp(apex::apex_options::kokkos_tuning_policy(),
"genetic_search", strlen("genetic_search")) == 0) {
strategy = apex_ah_tuning_strategy::GENETIC_SEARCH;
} else if (strncmp(apex::apex_options::kokkos_tuning_policy(),
"nelder_mead", strlen("nelder_mead")) == 0) {
strategy = apex_ah_tuning_strategy::NELDER_MEAD_INTERNAL;
} else if (strncmp(apex::apex_options::kokkos_tuning_policy(),
"automatic", strlen("automatic")) == 0) {
strategy = apex_ah_tuning_strategy::AUTOMATIC;
} else {
strategy = apex_ah_tuning_strategy::NELDER_MEAD;
strategy = apex_ah_tuning_strategy::AUTOMATIC;
}
}
public:
Expand Down Expand Up @@ -354,6 +360,25 @@ void KokkosSession::saveOutputVar(size_t id, Variable * var) {
all_vars.insert(std::make_pair(id, var));
}

std::string strategy_to_string(std::shared_ptr<apex_tuning_request> request) {
if (request->get_strategy() == apex_ah_tuning_strategy::APEX_RANDOM) {
return std::string("random");
}
if (request->get_strategy() == apex_ah_tuning_strategy::APEX_EXHAUSTIVE) {
return std::string("exhaustive");
}
if (request->get_strategy() == apex_ah_tuning_strategy::SIMULATED_ANNEALING) {
return std::string("simulated annealing");
}
if (request->get_strategy() == apex_ah_tuning_strategy::GENETIC_SEARCH) {
return std::string("genetic search");
}
if (request->get_strategy() == apex_ah_tuning_strategy::NELDER_MEAD_INTERNAL) {
return std::string("nelder mead");
}
return "unknown?";
}

void KokkosSession::writeCache(void) {
if(use_history) { return; }
if(!saveCache) { return; }
Expand Down Expand Up @@ -385,6 +410,12 @@ void KokkosSession::writeCache(void) {
// always write the random search out
bool converged = request->has_converged() ||
strategy == apex_ah_tuning_strategy::APEX_RANDOM;
results << " Strategy: \"" <<
strategy_to_string(request);
if (strategy == apex_ah_tuning_strategy::AUTOMATIC) {
results << " (auto)";
}
results << "\"" << std::endl;
results << " Converged: " <<
(converged ? "true" : "false") << std::endl;
if (converged) {
Expand Down Expand Up @@ -498,6 +529,8 @@ void KokkosSession::parseContextCache(std::ifstream& results) {
std::getline(results, line);
std::string name = line.substr(line.find(delimiter)+2);
name.erase(std::remove(name.begin(),name.end(),'\"'),name.end());
// strategy
std::getline(results, line);
// converged?
std::getline(results, line);
std::string converged = line.substr(line.find(delimiter)+2);
Expand Down Expand Up @@ -965,8 +998,52 @@ bool handle_start(const std::string & name, const size_t vars,
};
request->set_metric(metric);

// Set apex_openmp_policy_tuning_strategy
request->set_strategy(session.strategy);
// Set apex tuning strategy
if (session.strategy == apex_ah_tuning_strategy::AUTOMATIC) {
// just one variable?
if (vars == 1) {
auto id = values[0].type_id;
Variable* var{session.outputs[id]};
// and it's a small set of candidate values?
if (var->info.valueQuantity == kokkos_value_set &&
var->info.candidates.set.size < 4) {
request->set_strategy(apex_ah_tuning_strategy::APEX_EXHAUSTIVE);
// if integer, use simulated annealing
} else if (var->info.type == kokkos_value_int64) {
request->set_strategy(apex_ah_tuning_strategy::SIMULATED_ANNEALING);
// if double, use nelder mead
} else if (var->info.type == kokkos_value_double) {
request->set_strategy(apex_ah_tuning_strategy::NELDER_MEAD_INTERNAL);
}
// more than one variable...
} else {
// are any of them categorical?
bool haveSet = false;
bool allDouble = true;
for (size_t i = 0 ; i < vars ; i++) {
auto id = values[i].type_id;
Variable* var{session.outputs[id]};
if (var->info.valueQuantity == kokkos_value_set) {
haveSet = true;
allDouble = false;
} else if (var->info.type == kokkos_value_int64) {
allDouble = false;
}
}
// if have a categorical set, use genetic search
if (haveSet) {
request->set_strategy(apex_ah_tuning_strategy::GENETIC_SEARCH);
// if all double values, use nelder mead
} else if (allDouble) {
request->set_strategy(apex_ah_tuning_strategy::NELDER_MEAD_INTERNAL);
// as default, use simulated annealing
} else {
request->set_strategy(apex_ah_tuning_strategy::SIMULATED_ANNEALING);
}
}
} else {
request->set_strategy(session.strategy);
}
request->set_radius(0.5);
request->set_aggregation_times(3);
// min, max, mean
Expand Down Expand Up @@ -1134,10 +1211,12 @@ void kokkosp_request_values(
Kokkos_Tools_VariableValue* tuningVariableValues) {
if (!apex::apex_options::use_kokkos_tuning()) { return; }
// first, get the current timer node in the task tree
auto tlt = apex::thread_instance::get_top_level_timer();
//auto tlt = apex::thread_instance::get_top_level_timer();
auto tlt = apex::thread_instance::get_current_profiler();
std::string tree_node{"default"};
if (tlt != nullptr) {
tree_node = tlt->tree_node->getName();
//tree_node = tlt->tt_ptr->tree_node->getName();
tree_node = tlt->tt_ptr->task_id->get_name();
}
// don't track memory in this function.
apex::in_apex prevent_memory_tracking;
Expand Down
107 changes: 107 additions & 0 deletions src/apex/apex_policies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -914,6 +914,42 @@ int apex_sa_policy(shared_ptr<apex_tuning_session> tuning_session,
return APEX_NOERROR;
}

int apex_nelder_mead_policy(shared_ptr<apex_tuning_session> tuning_session,
apex_context const context) {
APEX_UNUSED(context);
if (apex_final) return APEX_NOERROR; // we terminated
std::unique_lock<std::mutex> l{shutdown_mutex};
/* If we are doing nested search contexts, allow us to keep searching
* on outer contexts until all inner contexts have converged! */
bool force{true};
if (context.data != nullptr) {
// the context data is a pointer to a boolean value
force = *((bool*)(context.data));
}
if (tuning_session->nelder_mead_session.converged() && force) {
if (!tuning_session->converged_message) {
tuning_session->converged_message = true;
cout << "APEX: Tuning has converged for session " << tuning_session->id
<< "." << endl;
tuning_session->nelder_mead_session.saveBestSettings();
tuning_session->nelder_mead_session.printBestSettings();
}
tuning_session->nelder_mead_session.saveBestSettings();
return APEX_NOERROR;
}

// get a measurement of our current setting
double new_value = tuning_session->metric_of_interest();

/* Report the performance we've just measured. */
tuning_session->nelder_mead_session.evaluate(new_value);

/* Request new settings for next time */
tuning_session->nelder_mead_session.getNewSettings();

return APEX_NOERROR;
}

int apex_genetic_policy(shared_ptr<apex_tuning_session> tuning_session,
apex_context const context) {
APEX_UNUSED(context);
Expand Down Expand Up @@ -1572,6 +1608,67 @@ inline int __sa_setup(shared_ptr<apex_tuning_session>
return APEX_NOERROR;
}

inline int __nelder_mead_setup(shared_ptr<apex_tuning_session>
tuning_session, apex_tuning_request & request) {
APEX_UNUSED(tuning_session);
// set up the Simulated annealing!
// iterate over the parameters, and create variables.
using namespace apex::nelder_mead;
for(auto & kv : request.params) {
auto & param = kv.second;
const char * param_name = param->get_name().c_str();
switch(param->get_type()) {
case apex_param_type::LONG: {
auto param_long =
std::static_pointer_cast<apex_param_long>(param);
Variable v(VariableType::longtype, param_long->value.get());
long lvalue = param_long->min;
do {
v.lvalues.push_back(lvalue);
lvalue = lvalue + param_long->step;
} while (lvalue <= param_long->max);
v.set_init();
tuning_session->nelder_mead_session.add_var(param_name, std::move(v));
}
break;
case apex_param_type::DOUBLE: {
auto param_double =
std::static_pointer_cast<apex_param_double>(param);
Variable v(VariableType::doubletype, param_double->value.get());
double dvalue = param_double->min;
do {
v.dvalues.push_back(dvalue);
dvalue = dvalue + param_double->step;
} while (dvalue <= param_double->max);
v.set_init();
tuning_session->nelder_mead_session.add_var(param_name, std::move(v));
}
break;
case apex_param_type::ENUM: {
auto param_enum =
std::static_pointer_cast<apex_param_enum>(param);
Variable v(VariableType::stringtype, param_enum->value.get());
for(const std::string & possible_value :
param_enum->possible_values) {
v.svalues.push_back(possible_value);
}
v.set_init();
tuning_session->nelder_mead_session.add_var(param_name, std::move(v));
}
break;
default:
cerr <<
"ERROR: Attempted to register tuning parameter with unknown type."
<< endl;
return APEX_ERROR;
}
}
/* request initial settings */
tuning_session->nelder_mead_session.getNewSettings();

return APEX_NOERROR;
}

inline int __genetic_setup(shared_ptr<apex_tuning_session>
tuning_session, apex_tuning_request & request) {
APEX_UNUSED(tuning_session);
Expand Down Expand Up @@ -1848,6 +1945,16 @@ inline int __common_setup_custom_tuning(shared_ptr<apex_tuning_session>
}
);
}
} else if (request.strategy == apex_ah_tuning_strategy::NELDER_MEAD_INTERNAL) {
status = __nelder_mead_setup(tuning_session, request);
if(status == APEX_NOERROR) {
apex::register_policy(
request.trigger,
[=](apex_context const & context)->int {
return apex_nelder_mead_policy(tuning_session, context);
}
);
}
} else if (request.strategy == apex_ah_tuning_strategy::GENETIC_SEARCH) {
status = __genetic_setup(tuning_session, request);
if(status == APEX_NOERROR) {
Expand Down
19 changes: 16 additions & 3 deletions src/apex/apex_policies.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,14 @@
#include "random.hpp"
// include the genetic_search class
#include "genetic_search.hpp"
// include the nelder_mead class
#include "nelder_mead.hpp"

enum class apex_param_type : int {NONE, LONG, DOUBLE, ENUM};
enum class apex_ah_tuning_strategy : int {
EXHAUSTIVE, RANDOM, NELDER_MEAD,
EXHAUSTIVE, RANDOM, NELDER_MEAD, NELDER_MEAD_INTERNAL,
PARALLEL_RANK_ORDER, SIMULATED_ANNEALING,
APEX_EXHAUSTIVE, APEX_RANDOM,
GENETIC_SEARCH};
APEX_EXHAUSTIVE, APEX_RANDOM, GENETIC_SEARCH, AUTOMATIC};

struct apex_tuning_session;
class apex_tuning_request;
Expand Down Expand Up @@ -82,6 +83,8 @@ class apex_param {
tuning_session, apex_tuning_request & request);
friend int __genetic_setup(std::shared_ptr<apex_tuning_session>
tuning_session, apex_tuning_request & request);
friend int __nelder_mead_setup(std::shared_ptr<apex_tuning_session>
tuning_session, apex_tuning_request & request);
};

class apex_param_long : public apex_param {
Expand Down Expand Up @@ -121,6 +124,8 @@ class apex_param_long : public apex_param {
tuning_session, apex_tuning_request & request);
friend int __genetic_setup(std::shared_ptr<apex_tuning_session>
tuning_session, apex_tuning_request & request);
friend int __nelder_mead_setup(std::shared_ptr<apex_tuning_session>
tuning_session, apex_tuning_request & request);
};

class apex_param_double : public apex_param {
Expand Down Expand Up @@ -160,6 +165,8 @@ class apex_param_double : public apex_param {
tuning_session, apex_tuning_request & request);
friend int __genetic_setup(std::shared_ptr<apex_tuning_session>
tuning_session, apex_tuning_request & request);
friend int __nelder_mead_setup(std::shared_ptr<apex_tuning_session>
tuning_session, apex_tuning_request & request);
};

class apex_param_enum : public apex_param {
Expand Down Expand Up @@ -198,6 +205,8 @@ class apex_param_enum : public apex_param {
tuning_session, apex_tuning_request & request);
friend int __genetic_setup(std::shared_ptr<apex_tuning_session>
tuning_session, apex_tuning_request & request);
friend int __nelder_mead_setup(std::shared_ptr<apex_tuning_session>
tuning_session, apex_tuning_request & request);
};


Expand Down Expand Up @@ -340,6 +349,8 @@ class apex_tuning_request {
tuning_session, apex_tuning_request & request);
friend int __genetic_setup(std::shared_ptr<apex_tuning_session>
tuning_session, apex_tuning_request & request);
friend int __nelder_mead_setup(std::shared_ptr<apex_tuning_session>
tuning_session, apex_tuning_request & request);
};


Expand Down Expand Up @@ -368,6 +379,8 @@ struct apex_tuning_session {
apex::random::Random random_session;
// if using genetic, this is the request.
apex::genetic::GeneticSearch genetic_session;
// if using nelder mead, this is the request.
apex::nelder_mead::NelderMead nelder_mead_session;
bool converged_message = false;

// variables related to power throttling
Expand Down
2 changes: 1 addition & 1 deletion src/apex/apex_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ inline unsigned int sc_nprocessors_onln(void)
APEX_DEFAULT_OTF2_ARCHIVE_NAME, "OTF2 trace filename.") \
macro (APEX_EVENT_FILTER_FILE, task_event_filter_file, char*, "", "File containing names of timers to include/exclude during data collection.") \
macro (APEX_KOKKOS_TUNING_CACHE, kokkos_tuning_cache, char*, "", "Filename contining Kokkos autotuned results, tuned offline.") \
macro (APEX_KOKKOS_TUNING_POLICY, kokkos_tuning_policy, char*, "simulated_annealing", "Kokkos autotuning policy: random, exhaustive, simulated_annealing, nelder_mead.") \
macro (APEX_KOKKOS_TUNING_POLICY, kokkos_tuning_policy, char*, "simulated_annealing", "Kokkos autotuning policy: random, exhaustive, simulated_annealing, nelder_mead, automatic.") \
macro (APEX_ROCPROF_METRICS, rocprof_metrics, char*, "", "List of metrics to periodically sample with the Rocprofiler library (see /opt/rocm/rocprofiler/lib/metrics.xml).") \
macro (APEX_NVTX_LIBRARY, nvtx_library, char*, "libnvToolsExt.so", "With NVTX listener, specify the location of libnvToolsExt.so.")
// macro (APEX_ROCPROF_METRICS, rocprof_metrics, char*, "MemUnitBusy,MemUnitStalled,VALUUtilization,VALUBusy,SALUBusy,L2CacheHit,WriteUnitStalled,ALUStalledByLDS,LDSBankConflict", "")
Expand Down
Loading

0 comments on commit 054c191

Please sign in to comment.