Skip to content

Commit

Permalink
A few updates to help with periodic sampling of ROCM metrics on Front…
Browse files Browse the repository at this point in the history
…ier.
  • Loading branch information
khuck committed Apr 5, 2024
1 parent b95e0db commit 25448d4
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 46 deletions.
1 change: 1 addition & 0 deletions src/apex/apex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,7 @@ uint64_t init(const char * thread_name, uint64_t comm_rank,
if (comm_rank == 0) {
printf("%s", apex_banner);
printf("APEX Version: %s\n", instance->version_string.c_str());
printf("Executing command line: %s\n", proc_data_reader::get_command_line().c_str());
}
FUNCTION_EXIT
return APEX_NOERROR;
Expand Down
20 changes: 19 additions & 1 deletion src/apex/apex_mpi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,25 @@ bool amIroot(MPI_Comm comm, int root) {
* implementation of Finalize, and do what we need to. */
#if defined(APEX_WITH_MPI) && !defined(HPX_HAVE_NETWORKING)
int MPI_Init(int *argc, char ***argv) {
int retval = PMPI_Init(argc, argv);
int retval = 0;
/* If the user passed in null, we can still extract the
command line arguments (why? because we can...) */
if (argc == NULL || argv == NULL) {
int _argc = 0;
char* _argv[256];
std::stringstream ss(apex::proc_data_reader::get_command_line());
std::string token;
while(getline(ss, token, ' ')) {
_argv[_argc] = strdup(token.c_str());
_argc++;
}
retval = PMPI_Init(&_argc, (char***)(&_argv));
for (int i = 0; i < _argc ; i++) {
free(_argv[i]);
}
} else {
retval = PMPI_Init(argc, argv);
}
int rank{0};
int size{0};
PMPI_Comm_rank(MPI_COMM_WORLD, &rank);
Expand Down
12 changes: 6 additions & 6 deletions src/apex/hip_profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include "util/hsa_rsrc_factory.h"
#include "util/test_assert.h"

#define ROCPROFILER_CALL(call) \
#define APEX_ROCPROFILER_CALL(call) \
do { \
hsa_status_t _status = call; \
if (_status != HSA_STATUS_SUCCESS) { \
Expand All @@ -31,7 +31,7 @@ do { \
} \
} while (0);

#define ROCPROFILER_CALL_NOEXIT(call) \
#define APEX_ROCPROFILER_CALL_NOEXIT(call) \
do { \
hsa_status_t _status = call; \
if (_status != HSA_STATUS_SUCCESS) { \
Expand Down Expand Up @@ -111,10 +111,10 @@ monitor::~monitor (void) {
}
// Finishing cleanup
// Deleting profiling context will delete all allocated resources
std::cout << "close..." << std::endl;
//status = rocprofiler_close(context);
//TEST_STATUS(status == HSA_STATUS_SUCCESS);
std::cout << "done." << std::endl;
//std::cout << "close..." << std::endl;
status = rocprofiler_close(context);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
//std::cout << "done." << std::endl;
}

// print profiler features
Expand Down
4 changes: 4 additions & 0 deletions src/apex/proc_read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,7 @@ namespace apex {
}
// release the main thread to continue
while(!done /*&& ptw->wait()*/) {
incrementPeriod();
//usleep(apex_options::proc_period());
std::unique_lock<std::mutex> lk(proc_data_reader::cv_m);
// if we've been interrupted by the main thread, break and exit
Expand Down Expand Up @@ -959,6 +960,9 @@ namespace apex {
return line;
}

std::atomic<uint64_t> proc_data_reader::sample_period{0};


std::array<double,2> getAvailableMemory() {
std::array<double,2> values{0,0};
/* Get the CPU memory */
Expand Down
6 changes: 6 additions & 0 deletions src/apex/proc_read.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#if APEX_HAVE_PROC

#include <stdio.h>
#include <unistd.h>
#include <vector>
#include <iostream>
#include <fstream>
Expand Down Expand Up @@ -50,6 +51,7 @@ class proc_data_reader {
std::thread worker_thread;
std::condition_variable cv;
std::mutex cv_m;
static std::atomic<uint64_t> sample_period;
public:
/*
static void* read_proc(void * _pdr);
Expand All @@ -71,13 +73,17 @@ class proc_data_reader {
if (worker_thread.joinable()) {
worker_thread.join();
}
// this is helpful if we are sampling frequently
usleep(apex_options::proc_period());
}

~proc_data_reader(void) {
stop_reading();
//delete worker_thread;
}
static std::string get_command_line(void);
static uint64_t getPeriod() { return sample_period; }
static void incrementPeriod() { sample_period++; }
};

class ProcData {
Expand Down
4 changes: 3 additions & 1 deletion src/apex/profiler_listener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,8 @@ std::unordered_set<profile*> free_profiles;
thread_instance::instance(false);
std::unique_lock<std::mutex> task_map_lock(_mtx);
counter_scatterplot_samples << std::fixed
<< std::setprecision(0) << p.normalized_timestamp()
<< std::setprecision(0) << proc_data_reader::getPeriod()
<< " " << p.normalized_timestamp()
<< " " << std::setprecision(6) << p.elapsed() << " "
<< "'" << p.get_task_id()->get_name() << "'" << endl;
int loc0 = task_scatterplot_samples.tellp();
Expand Down Expand Up @@ -721,6 +722,7 @@ std::unordered_set<profile*> free_profiles;
// want to write it out
stringstream screen_output;
// iterate over the profiles in the address map
screen_output << endl << "Command line: " << proc_data_reader::get_command_line();
screen_output << endl << "Start Date/Time: " << timestamp_started;
screen_output << endl << "Elapsed time: " << wall_clock_main
<< " seconds" << endl;
Expand Down
4 changes: 2 additions & 2 deletions src/apex/profiler_listener.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ class profiler_listener : public event_listener {
ss << filesystem_separator();
ss << task_scatterplot_sample_filename << node_id << ".csv";
// open the file
_task_scatterplot_sample_file.open(ss.str(), std::ofstream::out);
_task_scatterplot_sample_file.open(ss.str(), std::ofstream::out | std::ofstream::app);
if (!_task_scatterplot_sample_file.is_open()) {
perror("opening scatterplot sample file");
}
Expand All @@ -214,7 +214,7 @@ class profiler_listener : public event_listener {
ss << filesystem_separator();
ss << counter_scatterplot_sample_filename << node_id << ".csv";
// open the file
_counter_scatterplot_sample_file.open(ss.str(), std::ofstream::out);
_counter_scatterplot_sample_file.open(ss.str(), std::ofstream::out | std::ofstream::app);
if (!_counter_scatterplot_sample_file.is_open()) {
perror("opening scatterplot sample file");
}
Expand Down
11 changes: 10 additions & 1 deletion src/apex/trace_event_listener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,17 @@ inline void trace_event_listener::_common_stop(std::shared_ptr<profiler> &p) {
int i = 0;
for (auto metric :
apex::instance()->the_profiler_listener->get_metric_names()) {
//double start = p->papi_start_values[i];
double stop = p->papi_stop_values[i++];
/* this would be a good idea, but Perfetto allows us to visualize
as a delta or a rate, so not needed. It also confuses things for
nested timers, so for now, just allow monotonically increasing
counters to increase. */
/*
double start = p->papi_start_values[i];
if (!p->tt_ptr->explicit_trace_start) {
stop = stop - start;
}
*/
// write our counter into the event stream
ss << fixed;
ss << "{\"name\":\"" << metric
Expand Down
6 changes: 6 additions & 0 deletions src/scripts/apex-treesummary.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@ def print(self, depth, total, maxranks):
total = acc_mean
acc_percent = (acc_mean / total) * 100.0
acc_minimum = self.df[metric].min() # get min
acc_minimum_rank = self.df.loc[self.df[metric].idxmin()]['process rank']
acc_maximum = self.df[metric].max() # get max
acc_maximum_rank = self.df.loc[self.df[metric].idxmax()]['process rank']
acc_median = self.df[metric].median() # get median
acc_threads = self.df['threads'].sum() # get sum
acc_calls = self.df['calls'].mean() # get sum
acc_mean_per_call = acc_mean / acc_calls
Expand All @@ -116,7 +119,10 @@ def print(self, depth, total, maxranks):
tmpstr = tmpstr + ' - ' + '%.3f' % acc_percent
tmpstr = tmpstr + '% [' + str(int(acc_calls))
tmpstr = tmpstr + '] {min=' + '%.3f' % acc_minimum
tmpstr = tmpstr + ' (' + '%d' % acc_minimum + ')'
tmpstr = tmpstr + ', max=' + '%.3f' % acc_maximum
tmpstr = tmpstr + ' (' + '%d' % acc_maximum + ')'
tmpstr = tmpstr + ', median=' + '%.3f' % acc_median
tmpstr = tmpstr + ', mean=' + '%.3f' % acc_mean_per_call
tmpstr = tmpstr + ', threads=' + str(int(acc_threads))
tmpstr = tmpstr + '} ' + self.name + '\n'
Expand Down
85 changes: 50 additions & 35 deletions src/scripts/apex_exec
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,7 @@ while (( "$#" )); do
--apex:hip_metrics|--apex:hip-metrics)
hip=yes
hip_metrics=yes
export APEX_ENABLE_HIP=1
export APEX_HIP_PROFILER=1
#export APEX_ENABLE_HIP=1
export APEX_HIP_PROFILER=1
export ROCPROFILER_LOG=1
export HSA_VEN_AMD_AQLPROFILE_LOG=1
Expand Down Expand Up @@ -651,26 +650,17 @@ fi
echo_screen "APEX_LD_LIBRARY_PATH: ${APEX_LD_LIBRARY_PATH}"
echo_screen "APEX_LD_PRELOAD: ${APEX_LD_PRELOAD}"

if [ $debug = yes ] ; then
if [ "${myrank}" == "0" ] ; then
rm -f ./.gdbcmds
fi
fi
printf -v myrank_padded "%04g" $myrank
gdbcmds=/tmp/gdbcmds.${myrank}
rm -f ${gdbcmds}

if [ $apple = 1 ]; then
if [ $debug = yes ] ; then
if [ "${myrank}" == "0" ] ; then
echo "_regexp-env DYLD_LIBRARY_PATH=${APEX_LD_LIBRARY_PATH}" >> ./.gdbcmds
echo "_regexp-env DYLD_INSERT_LIBRARIES=${APEX_LD_PRELOAD}" >> ./.gdbcmds
echo "_regexp-env DYLD_FORCE_FLAT_NAMESPACE=1" >> ./.gdbcmds
echo "_regexp-env DYLD_LIBRARY_PATH=${APEX_LD_LIBRARY_PATH}" >> ${gdbcmds}
echo "_regexp-env DYLD_INSERT_LIBRARIES=${APEX_LD_PRELOAD}" >> ${gdbcmds}
echo "_regexp-env DYLD_FORCE_FLAT_NAMESPACE=1" >> ${gdbcmds}
if [ $# -gt 1 ] ; then
echo "settings set target.run-args ${*:2}" >> .gdb_commands
fi
else
# Give rank 0 enough time to write the .gdbcmds file
until [ -f ./.gdbcmds ] ; do
sleep 1
done
echo "settings set target.run-args ${*:2}" >> ${gdbcmds}
fi
debugger="lldb -s ./.gdbcmds --"
else
Expand All @@ -681,26 +671,44 @@ if [ $apple = 1 ]; then
else
if [ $debug = yes ] ; then
gdbargs=""
if [ "${myrank}" == "0" ] ; then
echo "set env LD_LIBRARY_PATH=${APEX_LD_LIBRARY_PATH}" >> ./.gdbcmds
echo "set env LD_PRELOAD=${APEX_LD_PRELOAD}" >> ./.gdbcmds
echo "set env LD_LIBRARY_PATH=${APEX_LD_LIBRARY_PATH}" >> ${gdbcmds}
echo "set env LD_PRELOAD=${APEX_LD_PRELOAD}" >> ${gdbcmds}
if [ $mpi = yes ]; then
echo "run" >> ./.gdbcmds
echo "bt" >> ./.gdbcmds
echo "quit" >> ./.gdbcmds
gdbargs="-batch -q"
fi
else
# Give rank 0 enough time to write the .gdbcmds file
until [ -f ./.gdbcmds ] ; do
sleep 1
done
# Set up logging
echo "set pagination off" >> ${gdbcmds}
echo "set height 0" >> ${gdbcmds}
echo "set logging overwrite on" >> ${gdbcmds}
echo "set logging redirect on" >> ${gdbcmds}
echo "set logging file zsgdb.${myrank_padded}.log" >> ${gdbcmds}
echo "set logging enabled on" >> ${gdbcmds}

# Run the executable
echo "run" >> ${gdbcmds}

# If non-zero exit, do the following:
echo "echo \n\nBacktrace:\n\n" >> ${gdbcmds}
echo "backtrace" >> ${gdbcmds}
echo "echo \n\nMain thread Backtrace:\n\n" >> ${gdbcmds}
echo "thread 1" >> ${gdbcmds}
echo "backtrace" >> ${gdbcmds}
#echo "echo \n\nRegisters:\n\n" >> ${gdbcmds}
#echo "info registers" >> ${gdbcmds}
#echo "echo \n\nCurrent instructions:\n\n" >> ${gdbcmds}
#echo "x/16i \$pc" >> ${gdbcmds}
echo "echo \n\nThreads:\n\n" >> ${gdbcmds}
echo "info threads" >> ${gdbcmds}
echo "echo \n\nThread Backtrace:\n\n" >> ${gdbcmds}
echo "thread apply all bt" >> ${gdbcmds}

# exit gdb
echo "quit" >> ${gdbcmds}
gdbargs="-batch -q"
fi
#echo "set env LD_AUDIT=${APEX_LD_AUDIT}" >> ./.gdbcmds
if [ $hip = yes ] ; then
debugger="rocgdb ${gdbargs} -x ./.gdbcmds --args"
debugger="rocgdb -x ${gdbcmds} ${gdbargs} --args"
else
debugger="gdb ${gdbargs} -x ./.gdbcmds --args"
debugger="gdb -x ${gdbcmds} ${gdbargs} --args"
fi
else
export LD_LIBRARY_PATH=${APEX_LD_LIBRARY_PATH}
Expand Down Expand Up @@ -746,6 +754,12 @@ env | while IFS= read -r line; do
echo "$value: $name"
fi
done
# For now, only collect ROCM metrics on device 0. Any other device fails.
if [[ ${APEX_PAPI_METRICS} == *":device="* ]]; then
if [ "${ROCR_VISIBLE_DEVICES}" != "0" ] ; then
export APEX_PAPI_METRICS=""
fi
fi
export LD_PRELOAD=${APEX_SAVE_LD_PRELOAD}

delim=";"
Expand All @@ -771,7 +785,7 @@ if [[ "$long_metrics" == *"$delim"* ]]; then
export APEX_PAPI_METRICS=${t}
export APEX_OUTPUT_FILE_PATH="${base_output_path}/METRIC_GROUP_${index}"
mkdir -p ${APEX_OUTPUT_FILE_PATH}
echo "Executing with metrics: \"${t}\", writing to ${APEX_OUTPUT_FILE_PATH}"
#echo "${myrank} Executing with metrics: ${APEX_PAPI_METRICS}, writing to ${APEX_OUTPUT_FILE_PATH}"
# Restore our preload settings
export LD_PRELOAD=${APEX_LD_PRELOAD}
${PARAMS}
Expand All @@ -784,12 +798,13 @@ if [[ "$long_metrics" == *"$delim"* ]]; then
done
unset IFS
else
#echo "${myrank} Executing with metrics: \"${APEX_PAPI_METRICS}\", writing to ${APEX_OUTPUT_FILE_PATH}"
${debugger} ${PARAMS}
retval=$?
unset LD_PRELOAD
unset DYLD_INSERT_LIBRARIES
if [ "${myrank}" == "0" ] ; then
rm -f ./.gdbcmds
rm -f ${gdbcmds}
fi
if [ ${retval} != 0 ] ; then
echo "Error ${retval}!"
Expand Down

0 comments on commit 25448d4

Please sign in to comment.