Skip to content

Commit

Permalink
Merge branch 'develop' of git.nic.uoregon.edu:/gitroot/xpress-apex in…
Browse files Browse the repository at this point in the history
…to develop
  • Loading branch information
khuck committed Mar 13, 2024
2 parents b148ee9 + d656d07 commit 475515e
Show file tree
Hide file tree
Showing 11 changed files with 178 additions and 240 deletions.
2 changes: 1 addition & 1 deletion kokkos
Submodule kokkos updated 512 files
4 changes: 3 additions & 1 deletion src/apex/apex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,7 @@ inline std::shared_ptr<task_wrapper> _new_task(
task_identifier * id,
const uint64_t task_id,
const std::shared_ptr<task_wrapper> parent_task, apex* instance) {
in_apex prevent_deadlocks;
APEX_UNUSED(instance);
std::shared_ptr<task_wrapper> tt_ptr = make_shared<task_wrapper>();
tt_ptr->task_id = id;
Expand Down Expand Up @@ -1674,7 +1675,7 @@ void finalize_plugins(void) {

std::string dump(bool reset, bool finalizing) {
in_apex prevent_deadlocks;
static size_t index{0};
static int index{0};
// if APEX is disabled, do nothing.
if (apex_options::disable() == true ||
(!finalizing && apex_options::use_final_output_only()))
Expand Down Expand Up @@ -1702,6 +1703,7 @@ std::string dump(bool reset, bool finalizing) {
controlMemoryWrapper(true);
}
if (_notify_listeners) {
//apex_get_leak_symbols();
dump_event_data data(instance->get_node_id(),
thread_instance::get_id(), reset);
for (unsigned int i = 0 ; i < instance->listeners.size() ; i++) {
Expand Down
39 changes: 24 additions & 15 deletions src/apex/apex_kokkos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,12 @@
#include "apex.hpp"
#include "Kokkos_Profiling_C_Interface.h"

/*
static std::mutex memory_mtx;
static std::unordered_map<void*,apex::profiler*>& memory_map() {
static std::unordered_map<void*,apex::profiler*> themap;
static std::unordered_map<void*,std::string>& memory_map() {
static std::unordered_map<void*,std::string> themap;
return themap;
}
*/

static std::stack<apex::profiler*>& timer_stack() {
static APEX_NATIVE_TLS std::stack<apex::profiler*> thestack;
return thestack;
Expand Down Expand Up @@ -147,9 +146,21 @@ void kokkosp_init_library(int loadseq, uint64_t version,
* profiling hooks.
*/
void kokkosp_finalize_library() {
memory_mtx.lock();
if (memory_map().size() == 0) {
if (apex::apex::instance()->get_node_id() == 0) {
std::cout << "No Kokkos allocation Leaks on rank 0!" << std::endl;
}
} else {
for (auto it : memory_map()) {
std::cerr << "Rank: " << apex::apex::instance()->get_node_id()
<< ", Kokkos allocation Leak: " << it.second << std::endl;
}
}
memory_mtx.unlock();
#ifndef APEX_HAVE_HPX
if (!apex::apex_options::use_mpi()) {
apex::finalize();
//apex::finalize();
}
#endif
}
Expand All @@ -174,6 +185,7 @@ void kokkosp_request_tool_settings(int num_actions,
*/
void kokkosp_begin_parallel_for(const char* name,
uint32_t devid, uint64_t* kernid) {
apex::in_apex prevent_memory_tracking;
std::stringstream ss;
ExecutionSpaceIdentifier space_id = identifier_from_devid(devid);
ss << "Kokkos::parallel_for ["
Expand All @@ -195,6 +207,7 @@ void kokkosp_begin_parallel_for(const char* name,

void kokkosp_begin_parallel_reduce(const char* name,
uint32_t devid, uint64_t* kernid) {
apex::in_apex prevent_memory_tracking;
std::stringstream ss;
ExecutionSpaceIdentifier space_id = identifier_from_devid(devid);
ss << "Kokkos::parallel_reduce ["
Expand All @@ -216,6 +229,7 @@ void kokkosp_begin_parallel_reduce(const char* name,

void kokkosp_begin_parallel_scan(const char* name,
uint32_t devid, uint64_t* kernid) {
apex::in_apex prevent_memory_tracking;
std::stringstream ss;
ExecutionSpaceIdentifier space_id = identifier_from_devid(devid);
ss << "Kokkos::parallel_scan ["
Expand Down Expand Up @@ -263,6 +277,7 @@ void kokkosp_end_parallel_scan(uint64_t kernid) {
* user.
*/
void kokkosp_push_profile_region(const char* name) {
apex::in_apex prevent_memory_tracking;
std::stringstream ss;
ss << "Kokkos region, " << name;
std::string tmp{ss.str()};
Expand Down Expand Up @@ -292,18 +307,15 @@ void kokkosp_pop_profile_region() {
*/
void kokkosp_allocate_data(SpaceHandle_t handle, const char* name,
void* ptr, uint64_t size) {
apex::in_apex prevent_memory_tracking;
APEX_UNUSED(ptr);
std::stringstream ss;
ss << "Kokkos " << handle.name << " data, " << name;
/*
std::string tmp{ss.str()};
auto p = apex::start(tmp);
std::string tmp2{ss.str()};
memory_mtx.lock();
memory_map().insert(std::pair<void*,apex::profiler*>(ptr, p));
memory_map().insert(std::pair<void*,std::string>(ptr, tmp2));
memory_mtx.unlock();
*/
ss << ": Bytes";
std::string tmp2{ss.str()};
double bytes = (double)(size);
if (apex::apex_options::use_kokkos_counters()) {
apex::sample_value(tmp2, bytes);
Expand All @@ -321,13 +333,9 @@ void kokkosp_deallocate_data(SpaceHandle handle, const char* name,
APEX_UNUSED(name);
APEX_UNUSED(ptr);
APEX_UNUSED(size);
/*
memory_mtx.lock();
auto p = memory_map()[ptr];
memory_map().erase(ptr);
memory_mtx.unlock();
apex::stop(p);
*/
}

/* This function will be called whenever a Kokkos::deep_copy function is
Expand All @@ -342,6 +350,7 @@ void kokkosp_begin_deep_copy(
SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr,
SpaceHandle src_handle, const char* src_name, const void* src_ptr,
uint64_t size) {
apex::in_apex prevent_memory_tracking;
std::stringstream ss;
ss << "Kokkos deep copy: " << src_handle.name << " " << src_name
<< " -> " << dst_handle.name << " " << dst_name;
Expand Down
3 changes: 2 additions & 1 deletion src/apex/apex_preload.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ int apex_preload_main(int argc, char** argv, char** envp) {
ret = main_real(argc, argv, envp);
} else {
apex::init("APEX Preload", 0, 1);
auto t = apex::new_task(__APEX_FUNCTION__);
const std::string timerName{__APEX_FUNCTION__};
auto t = apex::new_task(timerName);
apex::start(t);
ret = main_real(argc, argv, envp);
apex::stop(t);
Expand Down
4 changes: 2 additions & 2 deletions src/apex/hip_trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ bool getBytesIfMalloc(uint32_t cid, const hip_api_data_t* data,
hostTotalAllocated.fetch_add(bytes, std::memory_order_relaxed);
value = (double)(hostTotalAllocated);
store_sync_counter_data(nullptr, "Total Bytes Occupied on Host", value, false);
apex::recordAlloc(bytes, ptr, apex::GPU_HOST_MALLOC);
apex::recordAlloc(bytes, ptr, APEX_GPU_HOST_MALLOC);
return true;
} else {
if (managed) {
Expand All @@ -587,7 +587,7 @@ bool getBytesIfMalloc(uint32_t cid, const hip_api_data_t* data,
totalAllocated.fetch_add(bytes, std::memory_order_relaxed);
value = (double)(totalAllocated);
store_sync_counter_data(nullptr, "Total Bytes Occupied on Device", value, false);
apex::recordAlloc(bytes, ptr, apex::GPU_DEVICE_MALLOC, false);
apex::recordAlloc(bytes, ptr, APEX_GPU_DEVICE_MALLOC, false);
}
// how much memory does SMI think we have?
apex::rsmi::monitor::instance().explicitMemCheck();
Expand Down
60 changes: 48 additions & 12 deletions src/apex/memory_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
namespace apex {

static const char * allocator_strings[] = {
"malloc", "calloc", "realloc", "gpu_host_malloc", "gpu_device_malloc"
"malloc", "calloc", "realloc", "gpu_host_malloc", "gpu_device_malloc", "free"
};

book_t& getBook() {
Expand Down Expand Up @@ -98,10 +98,10 @@ void disable_memory_wrapper() {
}

void printBacktrace() {
void *trace[32];
void *trace[64];
size_t size, i;
char **strings;
size = backtrace( trace, 32 );
size = backtrace( trace, 64 );
strings = backtrace_symbols( trace, size );
std::cerr << std::endl;
// skip the first frame, it is this handler
Expand All @@ -110,7 +110,8 @@ void printBacktrace() {
}
}

void recordAlloc(size_t bytes, void* ptr, allocator_t alloc, bool cpu) {
void recordAlloc(const size_t bytes, const void* ptr,
const apex_allocator_t alloc, const bool cpu) {
if (!recording()) return;
static book_t& book = getBook();
double value = (double)(bytes);
Expand All @@ -123,7 +124,7 @@ void recordAlloc(size_t bytes, void* ptr, allocator_t alloc, bool cpu) {
tmp.size = backtrace(tmp.backtrace.data(), tmp.backtrace.size());
book.mapMutex.lock();
//book.memoryMap[ptr] = value;
book.memoryMap.insert(std::pair<void*,record_t>(ptr, tmp));
book.memoryMap.insert(std::pair<const void*,record_t>(ptr, tmp));
book.mapMutex.unlock();
book.totalAllocated.fetch_add(bytes, std::memory_order_relaxed);
if (p == nullptr) {
Expand All @@ -140,7 +141,7 @@ void recordAlloc(size_t bytes, void* ptr, allocator_t alloc, bool cpu) {
if (cpu) sample_value("Memory: Total Bytes Occupied", value);
}

void recordFree(void* ptr, bool cpu) {
void recordFree(const void* ptr, const bool cpu) {
if (!recording()) return;
static book_t& book = getBook();
size_t bytes;
Expand Down Expand Up @@ -184,8 +185,8 @@ void recordMetric(std::string name, double value) {
}

// Comparator function to sort pairs descending, according to second value
bool cmp(std::pair<void*, record_t>& a,
std::pair<void*, record_t>& b)
bool cmp(std::pair<const void*, record_t>& a,
std::pair<const void*, record_t>& b)
{
return a.second.bytes > b.second.bytes;
}
Expand All @@ -197,6 +198,22 @@ bool cmp2(std::pair<std::string, size_t>& a,
return a.second > b.second;
}

void apex_get_leak_symbols() {
in_apex prevent_memory_tracking;
if (!apex_options::track_cpu_memory()) { return; }
if (!recording()) return;
static book_t& book = getBook();
for (auto& it : book.memoryMap) {
for(size_t i = 0; i < it.second.size; i++ ){
std::string* tmp2{lookup_address(((uintptr_t)it.second.backtrace[i]), true)};
it.second.symbols[i] = *tmp2;
//delete tmp2;
}
it.second.resolved = true;
}

}

void apex_report_leaks() {
if (!apex_options::track_gpu_memory() && !apex_options::track_cpu_memory()) {
return;
Expand All @@ -211,7 +228,7 @@ void apex_report_leaks() {
std::string outfile{ss.str()};
std::ofstream report (outfile);
// Declare vector of pairs
std::vector<std::pair<void*, record_t> > sorted;
std::vector<std::pair<const void*, record_t> > sorted;

if (book.saved_node_id == 0) {
std::cout << "APEX Memory Report: (see " << outfile << ")" << std::endl;
Expand All @@ -238,6 +255,7 @@ void apex_report_leaks() {
}
size_t actual_leaks{0};
// Print the sorted value
size_t actual_bytes{0};
for (auto& it : sorted) {
std::stringstream ss;
//if (it.second.bytes > 1000) {
Expand Down Expand Up @@ -266,13 +284,27 @@ void apex_report_leaks() {
if (tmp.find("pthread_once", 0) != std::string::npos) { skip = true; break; }
if (tmp.find("atexit", 0) != std::string::npos) { skip = true; break; }
if (tmp.find("apex_pthread_function", 0) != std::string::npos) { skip = true; break; }
if (tmp.find("hipFuncGetAttributes", 0) != std::string::npos) { skip = true; break; }
if (nameless) {
if (tmp.find("libcuda", 0) != std::string::npos) { skip = true; break; }
if (tmp.find("GOMP_parallel", 0) != std::string::npos) { skip = true; break; }
}
}
std::string* tmp2{lookup_address(((uintptr_t)it.second.backtrace[i]), true)};
ss << "\t" << *tmp2 << std::endl;
const std::string unknown{"{(unknown)}"};
if (it.second.resolved) {
if (it.second.symbols[i].find(unknown) == std::string::npos) {
ss << "\t" << it.second.symbols[i] << std::endl;
} else {
ss << "\t" << tmp << std::endl;
}
} else {
std::string* tmp2{lookup_address(((uintptr_t)it.second.backtrace[i]), true)};
if (tmp2->find(unknown) == std::string::npos) {
ss << "\t" << *tmp2 << std::endl;
} else {
ss << "\t" << tmp << std::endl;
}
}
}
if (skip) { continue; }

Expand All @@ -295,10 +327,14 @@ void apex_report_leaks() {
*/
report << ss.str();
actual_leaks++;
actual_bytes+=it.second.bytes;
}
report.close();
if (book.saved_node_id == 0) {
std::cout << "Reported " << actual_leaks << " 'actual' leaks.\nExpect false positives if memory was freed after exit." << std::endl;
std::cout << "Reported " << actual_leaks << " 'actual' leaks of "
<< actual_bytes
<< " bytes.\nExpect false positives if memory was freed after exit."
<< std::endl;
}
if (actual_leaks == 0) {
remove(outfile.c_str());
Expand Down
44 changes: 21 additions & 23 deletions src/apex/memory_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,33 @@
#pragma once
#include <apex.hpp>

typedef enum apex_allocator {
APEX_MALLOC = 0,
APEX_CALLOC,
APEX_REALLOC,
APEX_GPU_HOST_MALLOC,
APEX_GPU_DEVICE_MALLOC,
APEX_FREE
} apex_allocator_t;

namespace apex {

void apex_report_leaks();

typedef enum allocator {
MALLOC = 0,
CALLOC,
REALLOC,
GPU_HOST_MALLOC,
GPU_DEVICE_MALLOC
} allocator_t;
void apex_get_leak_symbols();

class record_t {
public:
size_t bytes;
task_identifier * id;
size_t tid;
allocator_t alloc;
record_t() : bytes(0), id(nullptr), tid(0), alloc(MALLOC), cpu(true) {}
record_t(size_t b, size_t t, allocator_t a, bool on_cpu) :
bytes(b), id(nullptr), tid(t), alloc(a), cpu(on_cpu) {}
apex_allocator_t alloc;
record_t() : bytes(0), id(nullptr), tid(0), alloc(APEX_MALLOC), resolved(false), cpu(true) {}
record_t(size_t b, size_t t, apex_allocator_t a, bool on_cpu) :
bytes(b), id(nullptr), tid(t), alloc(a), resolved(false), cpu(on_cpu) {}
//std::vector<uintptr_t> backtrace;
std::array<void*,32> backtrace;
std::array<void*,64> backtrace;
std::array<std::string,64> symbols;
bool resolved;
size_t size;
bool cpu;
};
Expand All @@ -45,25 +49,19 @@ class book_t {
public:
size_t saved_node_id;
std::atomic<size_t> totalAllocated{0};
std::unordered_map<void*,record_t> memoryMap;
std::unordered_map<const void*,record_t> memoryMap;
std::mutex mapMutex;
~book_t() {
apex_report_leaks();
}
};

class backtrace_record_t {
public:
size_t skip;
std::vector<uintptr_t>& _stack;
backtrace_record_t(size_t s, std::vector<uintptr_t>& _s) : skip(s), _stack(_s) {}
};

book_t& getBook(void);
void controlMemoryWrapper(bool enabled);
void printBacktrace(void);
void recordAlloc(size_t bytes, void* ptr, allocator_t alloc, bool cpu = true);
void recordFree(void* ptr, bool cpu = true);
void recordAlloc(const size_t bytes, const void* ptr,
const apex_allocator_t alloc, const bool cpu = true);
void recordFree(const void* ptr, const bool cpu = true);
void recordMetric(std::string name, double value);

}; // apex namespace
Expand Down
Loading

0 comments on commit 475515e

Please sign in to comment.