From ad74dc50e1dd04d339d560f960167b1bd57986cf Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Wed, 28 Feb 2024 13:34:31 -0500 Subject: [PATCH 1/4] Updating kokkos to 4.2.01 --- kokkos | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kokkos b/kokkos index 1a3ea28f..e0dc0128 160000 --- a/kokkos +++ b/kokkos @@ -1 +1 @@ -Subproject commit 1a3ea28f6e97b4c9dd2c8ceed53ad58ed5f94dfe +Subproject commit e0dc0128e04f18c2bbbaefceef3616e7ddcfa3c4 From 806c48927b518cd2e365824ea0998bda6a753edd Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Wed, 28 Feb 2024 14:09:19 -0500 Subject: [PATCH 2/4] Re-enabling kokkos allocation tracking When enabled, APEX will keep track of allocations through Kokkos and ensure they are all freed before exit --- src/apex/apex_kokkos.cpp | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/apex/apex_kokkos.cpp b/src/apex/apex_kokkos.cpp index 2a95e3c7..71882b5c 100644 --- a/src/apex/apex_kokkos.cpp +++ b/src/apex/apex_kokkos.cpp @@ -29,13 +29,12 @@ #include "apex.hpp" #include "Kokkos_Profiling_C_Interface.h" -/* static std::mutex memory_mtx; -static std::unordered_map& memory_map() { - static std::unordered_map themap; +static std::unordered_map& memory_map() { + static std::unordered_map themap; return themap; } -*/ + static std::stack& timer_stack() { static APEX_NATIVE_TLS std::stack thestack; return thestack; @@ -147,9 +146,21 @@ void kokkosp_init_library(int loadseq, uint64_t version, * profiling hooks. */ void kokkosp_finalize_library() { + memory_mtx.lock(); + if (memory_map().size() == 0) { + if (apex::apex::instance()->get_node_id() == 0) { + std::cout << "No Kokkos allocation Leaks on rank 0!" << std::endl; + } + } else { + for (auto it : memory_map()) { + std::cerr << "Rank: " << apex::apex::instance()->get_node_id() + << ", Kokkos allocation Leak: " << it.second << std::endl; + } + } + memory_mtx.unlock(); #ifndef APEX_HAVE_HPX if (!apex::apex_options::use_mpi()) { - apex::finalize(); + //apex::finalize(); } #endif } @@ -174,6 +185,7 @@ void kokkosp_request_tool_settings(int num_actions, */ void kokkosp_begin_parallel_for(const char* name, uint32_t devid, uint64_t* kernid) { + apex::in_apex prevent_memory_tracking; std::stringstream ss; ExecutionSpaceIdentifier space_id = identifier_from_devid(devid); ss << "Kokkos::parallel_for [" @@ -195,6 +207,7 @@ void kokkosp_begin_parallel_for(const char* name, void kokkosp_begin_parallel_reduce(const char* name, uint32_t devid, uint64_t* kernid) { + apex::in_apex prevent_memory_tracking; std::stringstream ss; ExecutionSpaceIdentifier space_id = identifier_from_devid(devid); ss << "Kokkos::parallel_reduce [" @@ -216,6 +229,7 @@ void kokkosp_begin_parallel_reduce(const char* name, void kokkosp_begin_parallel_scan(const char* name, uint32_t devid, uint64_t* kernid) { + apex::in_apex prevent_memory_tracking; std::stringstream ss; ExecutionSpaceIdentifier space_id = identifier_from_devid(devid); ss << "Kokkos::parallel_scan [" @@ -263,6 +277,7 @@ void kokkosp_end_parallel_scan(uint64_t kernid) { * user. */ void kokkosp_push_profile_region(const char* name) { + apex::in_apex prevent_memory_tracking; std::stringstream ss; ss << "Kokkos region, " << name; std::string tmp{ss.str()}; @@ -292,18 +307,15 @@ void kokkosp_pop_profile_region() { */ void kokkosp_allocate_data(SpaceHandle_t handle, const char* name, void* ptr, uint64_t size) { + apex::in_apex prevent_memory_tracking; APEX_UNUSED(ptr); std::stringstream ss; ss << "Kokkos " << handle.name << " data, " << name; - /* - std::string tmp{ss.str()}; - auto p = apex::start(tmp); + std::string tmp2{ss.str()}; memory_mtx.lock(); - memory_map().insert(std::pair(ptr, p)); + memory_map().insert(std::pair(ptr, tmp2)); memory_mtx.unlock(); - */ ss << ": Bytes"; - std::string tmp2{ss.str()}; double bytes = (double)(size); if (apex::apex_options::use_kokkos_counters()) { apex::sample_value(tmp2, bytes); @@ -321,13 +333,9 @@ void kokkosp_deallocate_data(SpaceHandle handle, const char* name, APEX_UNUSED(name); APEX_UNUSED(ptr); APEX_UNUSED(size); - /* memory_mtx.lock(); - auto p = memory_map()[ptr]; memory_map().erase(ptr); memory_mtx.unlock(); - apex::stop(p); - */ } /* This function will be called whenever a Kokkos::deep_copy function is @@ -342,6 +350,7 @@ void kokkosp_begin_deep_copy( SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr, SpaceHandle src_handle, const char* src_name, const void* src_ptr, uint64_t size) { + apex::in_apex prevent_memory_tracking; std::stringstream ss; ss << "Kokkos deep copy: " << src_handle.name << " " << src_name << " -> " << dst_handle.name << " " << dst_name; From c985b2110006794249c1a8bfd1c81293ab421766 Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Wed, 28 Feb 2024 14:10:11 -0500 Subject: [PATCH 3/4] Trying to clean up memory allocation tracking When tracking allocations on the host, everything seems to be working correctly but on occasion, we see allocation amounts changing on the stack in gdb on frontier. can't explain it yet. But some fixes are included in this commit. --- src/apex/apex.cpp | 4 +- src/apex/apex_preload.cpp | 3 +- src/apex/hip_trace.cpp | 4 +- src/apex/memory_wrapper.cpp | 60 +++++++-- src/apex/memory_wrapper.hpp | 44 +++--- src/wrappers/memory_wrapper.cpp | 162 ++++++++--------------- src/wrappers/memory_wrapper.h | 8 +- src/wrappers/memory_wrapper_internal.cpp | 86 +++--------- 8 files changed, 150 insertions(+), 221 deletions(-) diff --git a/src/apex/apex.cpp b/src/apex/apex.cpp index b0ab125d..d7a8245c 100644 --- a/src/apex/apex.cpp +++ b/src/apex/apex.cpp @@ -626,6 +626,7 @@ inline std::shared_ptr _new_task( task_identifier * id, const uint64_t task_id, const std::shared_ptr parent_task, apex* instance) { + in_apex prevent_deadlocks; APEX_UNUSED(instance); std::shared_ptr tt_ptr = make_shared(); tt_ptr->task_id = id; @@ -1670,7 +1671,7 @@ void finalize_plugins(void) { std::string dump(bool reset, bool finalizing) { in_apex prevent_deadlocks; - static size_t index{0}; + static int index{0}; // if APEX is disabled, do nothing. if (apex_options::disable() == true || (!finalizing && apex_options::use_final_output_only())) @@ -1698,6 +1699,7 @@ std::string dump(bool reset, bool finalizing) { controlMemoryWrapper(true); } if (_notify_listeners) { + //apex_get_leak_symbols(); dump_event_data data(instance->get_node_id(), thread_instance::get_id(), reset); for (unsigned int i = 0 ; i < instance->listeners.size() ; i++) { diff --git a/src/apex/apex_preload.cpp b/src/apex/apex_preload.cpp index 930db837..48f2b979 100644 --- a/src/apex/apex_preload.cpp +++ b/src/apex/apex_preload.cpp @@ -75,7 +75,8 @@ int apex_preload_main(int argc, char** argv, char** envp) { ret = main_real(argc, argv, envp); } else { apex::init("APEX Preload", 0, 1); - auto t = apex::new_task(__APEX_FUNCTION__); + const std::string timerName{__APEX_FUNCTION__}; + auto t = apex::new_task(timerName); apex::start(t); ret = main_real(argc, argv, envp); apex::stop(t); diff --git a/src/apex/hip_trace.cpp b/src/apex/hip_trace.cpp index 209a1af7..635e9bc3 100644 --- a/src/apex/hip_trace.cpp +++ b/src/apex/hip_trace.cpp @@ -573,7 +573,7 @@ bool getBytesIfMalloc(uint32_t cid, const hip_api_data_t* data, hostTotalAllocated.fetch_add(bytes, std::memory_order_relaxed); value = (double)(hostTotalAllocated); store_sync_counter_data(nullptr, "Total Bytes Occupied on Host", value, false); - apex::recordAlloc(bytes, ptr, apex::GPU_HOST_MALLOC); + apex::recordAlloc(bytes, ptr, APEX_GPU_HOST_MALLOC); return true; } else { if (managed) { @@ -587,7 +587,7 @@ bool getBytesIfMalloc(uint32_t cid, const hip_api_data_t* data, totalAllocated.fetch_add(bytes, std::memory_order_relaxed); value = (double)(totalAllocated); store_sync_counter_data(nullptr, "Total Bytes Occupied on Device", value, false); - apex::recordAlloc(bytes, ptr, apex::GPU_DEVICE_MALLOC, false); + apex::recordAlloc(bytes, ptr, APEX_GPU_DEVICE_MALLOC, false); } // how much memory does SMI think we have? apex::rsmi::monitor::instance().explicitMemCheck(); diff --git a/src/apex/memory_wrapper.cpp b/src/apex/memory_wrapper.cpp index 73d5e406..efec7e28 100644 --- a/src/apex/memory_wrapper.cpp +++ b/src/apex/memory_wrapper.cpp @@ -22,7 +22,7 @@ namespace apex { static const char * allocator_strings[] = { - "malloc", "calloc", "realloc", "gpu_host_malloc", "gpu_device_malloc" + "malloc", "calloc", "realloc", "gpu_host_malloc", "gpu_device_malloc", "free" }; book_t& getBook() { @@ -98,10 +98,10 @@ void disable_memory_wrapper() { } void printBacktrace() { - void *trace[32]; + void *trace[64]; size_t size, i; char **strings; - size = backtrace( trace, 32 ); + size = backtrace( trace, 64 ); strings = backtrace_symbols( trace, size ); std::cerr << std::endl; // skip the first frame, it is this handler @@ -110,7 +110,8 @@ void printBacktrace() { } } -void recordAlloc(size_t bytes, void* ptr, allocator_t alloc, bool cpu) { +void recordAlloc(const size_t bytes, const void* ptr, + const apex_allocator_t alloc, const bool cpu) { if (!recording()) return; static book_t& book = getBook(); double value = (double)(bytes); @@ -123,7 +124,7 @@ void recordAlloc(size_t bytes, void* ptr, allocator_t alloc, bool cpu) { tmp.size = backtrace(tmp.backtrace.data(), tmp.backtrace.size()); book.mapMutex.lock(); //book.memoryMap[ptr] = value; - book.memoryMap.insert(std::pair(ptr, tmp)); + book.memoryMap.insert(std::pair(ptr, tmp)); book.mapMutex.unlock(); book.totalAllocated.fetch_add(bytes, std::memory_order_relaxed); if (p == nullptr) { @@ -140,7 +141,7 @@ void recordAlloc(size_t bytes, void* ptr, allocator_t alloc, bool cpu) { if (cpu) sample_value("Memory: Total Bytes Occupied", value); } -void recordFree(void* ptr, bool cpu) { +void recordFree(const void* ptr, const bool cpu) { if (!recording()) return; static book_t& book = getBook(); size_t bytes; @@ -184,8 +185,8 @@ void recordMetric(std::string name, double value) { } // Comparator function to sort pairs descending, according to second value -bool cmp(std::pair& a, - std::pair& b) +bool cmp(std::pair& a, + std::pair& b) { return a.second.bytes > b.second.bytes; } @@ -197,6 +198,22 @@ bool cmp2(std::pair& a, return a.second > b.second; } +void apex_get_leak_symbols() { + in_apex prevent_memory_tracking; + if (!apex_options::track_cpu_memory()) { return; } + if (!recording()) return; + static book_t& book = getBook(); + for (auto& it : book.memoryMap) { + for(size_t i = 0; i < it.second.size; i++ ){ + std::string* tmp2{lookup_address(((uintptr_t)it.second.backtrace[i]), true)}; + it.second.symbols[i] = *tmp2; + //delete tmp2; + } + it.second.resolved = true; + } + +} + void apex_report_leaks() { if (!apex_options::track_gpu_memory() && !apex_options::track_cpu_memory()) { return; @@ -211,7 +228,7 @@ void apex_report_leaks() { std::string outfile{ss.str()}; std::ofstream report (outfile); // Declare vector of pairs - std::vector > sorted; + std::vector > sorted; if (book.saved_node_id == 0) { std::cout << "APEX Memory Report: (see " << outfile << ")" << std::endl; @@ -238,6 +255,7 @@ void apex_report_leaks() { } size_t actual_leaks{0}; // Print the sorted value + size_t actual_bytes{0}; for (auto& it : sorted) { std::stringstream ss; //if (it.second.bytes > 1000) { @@ -266,13 +284,27 @@ void apex_report_leaks() { if (tmp.find("pthread_once", 0) != std::string::npos) { skip = true; break; } if (tmp.find("atexit", 0) != std::string::npos) { skip = true; break; } if (tmp.find("apex_pthread_function", 0) != std::string::npos) { skip = true; break; } + if (tmp.find("hipFuncGetAttributes", 0) != std::string::npos) { skip = true; break; } if (nameless) { if (tmp.find("libcuda", 0) != std::string::npos) { skip = true; break; } if (tmp.find("GOMP_parallel", 0) != std::string::npos) { skip = true; break; } } } - std::string* tmp2{lookup_address(((uintptr_t)it.second.backtrace[i]), true)}; - ss << "\t" << *tmp2 << std::endl; + const std::string unknown{"{(unknown)}"}; + if (it.second.resolved) { + if (it.second.symbols[i].find(unknown) == std::string::npos) { + ss << "\t" << it.second.symbols[i] << std::endl; + } else { + ss << "\t" << tmp << std::endl; + } + } else { + std::string* tmp2{lookup_address(((uintptr_t)it.second.backtrace[i]), true)}; + if (tmp2->find(unknown) == std::string::npos) { + ss << "\t" << *tmp2 << std::endl; + } else { + ss << "\t" << tmp << std::endl; + } + } } if (skip) { continue; } @@ -295,10 +327,14 @@ void apex_report_leaks() { */ report << ss.str(); actual_leaks++; + actual_bytes+=it.second.bytes; } report.close(); if (book.saved_node_id == 0) { - std::cout << "Reported " << actual_leaks << " 'actual' leaks.\nExpect false positives if memory was freed after exit." << std::endl; + std::cout << "Reported " << actual_leaks << " 'actual' leaks of " + << actual_bytes + << " bytes.\nExpect false positives if memory was freed after exit." + << std::endl; } if (actual_leaks == 0) { remove(outfile.c_str()); diff --git a/src/apex/memory_wrapper.hpp b/src/apex/memory_wrapper.hpp index 6072ee8e..28b44852 100644 --- a/src/apex/memory_wrapper.hpp +++ b/src/apex/memory_wrapper.hpp @@ -14,29 +14,33 @@ #pragma once #include +typedef enum apex_allocator { + APEX_MALLOC = 0, + APEX_CALLOC, + APEX_REALLOC, + APEX_GPU_HOST_MALLOC, + APEX_GPU_DEVICE_MALLOC, + APEX_FREE +} apex_allocator_t; + namespace apex { void apex_report_leaks(); - -typedef enum allocator { - MALLOC = 0, - CALLOC, - REALLOC, - GPU_HOST_MALLOC, - GPU_DEVICE_MALLOC -} allocator_t; +void apex_get_leak_symbols(); class record_t { public: size_t bytes; task_identifier * id; size_t tid; - allocator_t alloc; - record_t() : bytes(0), id(nullptr), tid(0), alloc(MALLOC), cpu(true) {} - record_t(size_t b, size_t t, allocator_t a, bool on_cpu) : - bytes(b), id(nullptr), tid(t), alloc(a), cpu(on_cpu) {} + apex_allocator_t alloc; + record_t() : bytes(0), id(nullptr), tid(0), alloc(APEX_MALLOC), resolved(false), cpu(true) {} + record_t(size_t b, size_t t, apex_allocator_t a, bool on_cpu) : + bytes(b), id(nullptr), tid(t), alloc(a), resolved(false), cpu(on_cpu) {} //std::vector backtrace; - std::array backtrace; + std::array backtrace; + std::array symbols; + bool resolved; size_t size; bool cpu; }; @@ -45,25 +49,19 @@ class book_t { public: size_t saved_node_id; std::atomic totalAllocated{0}; - std::unordered_map memoryMap; + std::unordered_map memoryMap; std::mutex mapMutex; ~book_t() { apex_report_leaks(); } }; -class backtrace_record_t { -public: - size_t skip; - std::vector& _stack; - backtrace_record_t(size_t s, std::vector& _s) : skip(s), _stack(_s) {} -}; - book_t& getBook(void); void controlMemoryWrapper(bool enabled); void printBacktrace(void); -void recordAlloc(size_t bytes, void* ptr, allocator_t alloc, bool cpu = true); -void recordFree(void* ptr, bool cpu = true); +void recordAlloc(const size_t bytes, const void* ptr, + const apex_allocator_t alloc, const bool cpu = true); +void recordFree(const void* ptr, const bool cpu = true); void recordMetric(std::string name, double value); }; // apex namespace diff --git a/src/wrappers/memory_wrapper.cpp b/src/wrappers/memory_wrapper.cpp index 37b950cc..0f7bf73f 100644 --- a/src/wrappers/memory_wrapper.cpp +++ b/src/wrappers/memory_wrapper.cpp @@ -29,34 +29,34 @@ * has finished initialization and is about to launch main. * So we use 2 flags to accomlpish this. */ -bool& apex_ready() { +bool& apex_memory_ready() { static bool _ready = false; return _ready; } -bool& dl_ready() { +bool& apex_dl_ready() { static bool _ready = true; return _ready; } -bool& enabled() { +bool& apex_memory_enabled() { static bool _enabled = true; return _enabled; } -bool all_clear() { - return apex_ready() && dl_ready() && enabled(); +bool apex_memory_all_clear() { + return apex_memory_ready() && apex_dl_ready() && apex_memory_enabled(); } extern "C" void apex_memory_initialized() { apex_memory_wrapper_init(); - apex_ready() = true; + apex_memory_ready() = true; } extern "C" void apex_memory_lights_out() { - apex_ready() = false; + apex_memory_ready() = false; apex::apex_report_leaks(); } @@ -67,28 +67,30 @@ void apex_memory_finalized() { extern "C" void apex_memory_dl_initialized() { - dl_ready() = true; + apex_dl_ready() = true; } /* During startup, we need to do some memory management in case * malloc/free is called during the startup process. */ // Memory for bootstrapping. must not be static! -char bootstrap_heap[BOOTSTRAP_HEAP_SIZE]; -char * bootstrap_base = bootstrap_heap; +char apex_memory_bootstrap_heap[BOOTSTRAP_HEAP_SIZE]; +char * apex_memory_bootstrap_base = apex_memory_bootstrap_heap; -uintptr_t reportHeapLocation() { - printf("Bootstrap heap located at: %p\n", (void*)(&bootstrap_heap[0])); - return (uintptr_t)&bootstrap_heap[0]; +/* +uintptr_t apex_memory_reportHeapLocation() { + printf("Bootstrap heap located at: %p\n", (void*)(&apex_memory_bootstrap_heap[0])); + return (uintptr_t)&apex_memory_bootstrap_heap[0]; } +*/ -static inline int is_bootstrap(void * ptr) { +static inline int apex_memory_is_bootstrap(void * ptr) { char const * const p = (char*)ptr; - return (p < bootstrap_heap + BOOTSTRAP_HEAP_SIZE) && (bootstrap_heap < p); + return (p < apex_memory_bootstrap_heap + BOOTSTRAP_HEAP_SIZE) && (apex_memory_bootstrap_heap < p); } -static void * bootstrap_alloc(size_t align, size_t size) { - //static uintptr_t dummy = reportHeapLocation(); +static void * apex_memory_bootstrap_alloc(size_t align, size_t size) { + //static uintptr_t dummy = apex_memory_reportHeapLocation(); //APEX_UNUSED(dummy); char * ptr; @@ -106,11 +108,11 @@ static void * bootstrap_alloc(size_t align, size_t size) { } // Calculate address - ptr = (char*)(((size_t)bootstrap_base + (align-1)) & ~(align-1)); - bootstrap_base = ptr + size; + ptr = (char*)(((size_t)apex_memory_bootstrap_base + (align-1)) & ~(align-1)); + apex_memory_bootstrap_base = ptr + size; // Check for overflow - if (bootstrap_base >= (bootstrap_heap + BOOTSTRAP_HEAP_SIZE)) { + if (apex_memory_bootstrap_base >= (apex_memory_bootstrap_heap + BOOTSTRAP_HEAP_SIZE)) { // These calls are unsafe, but we're about to die anyway. printf("APEX bootstreap heap exceeded. Increase BOOTSTRAP_HEAP_SIZE in " __FILE__ " and try again.\n"); fflush(stdout); @@ -120,7 +122,7 @@ static void * bootstrap_alloc(size_t align, size_t size) { return (void*)ptr; } -static inline void bootstrap_free(void * ptr) { +static inline void apex_memory_bootstrap_free(void * ptr) { // Do nothing: bootstrap memory is deallocated on program exit APEX_UNUSED(ptr); } @@ -142,7 +144,7 @@ static inline void bootstrap_free(void * ptr) { } template T -get_system_function_handle(char const * name, T caller) +apex_get_system_function_handle(char const * name, T caller) { T handle; @@ -188,17 +190,17 @@ void* malloc (size_t size) __THROW { if (!bootstrapped) { if (!initializing) { initializing = true; - _malloc = get_system_function_handle("malloc", &malloc); + _malloc = apex_get_system_function_handle("malloc", &malloc); } if (!_malloc) { - return bootstrap_alloc(0, size); + return apex_memory_bootstrap_alloc(0, size); } - if (!all_clear()) { + if (!apex_memory_all_clear()) { return _malloc(size); } bootstrapped = true; } - if (all_clear()) { + if (apex_memory_all_clear()) { return apex_malloc_wrapper(_malloc, size); } return _malloc(size); @@ -209,25 +211,25 @@ void free (void* ptr) __THROW { static free_p _free = NULL; static bool initializing = false; static bool bootstrapped = false; - if (is_bootstrap(ptr)) { + if (apex_memory_is_bootstrap(ptr)) { // do nothing, effectively - return bootstrap_free(ptr); + return apex_memory_bootstrap_free(ptr); } if (!bootstrapped) { if (!initializing) { initializing = true; - _free = get_system_function_handle("free", &free); + _free = apex_get_system_function_handle("free", &free); } if (!_free) { // do nothing, effectively - return bootstrap_free(ptr); + return apex_memory_bootstrap_free(ptr); } - if (!all_clear()) { + if (!apex_memory_all_clear()) { return _free(ptr); } bootstrapped = true; } - if (all_clear()) { + if (apex_memory_all_clear()) { return apex_free_wrapper(_free, ptr); } return _free(ptr); @@ -241,7 +243,7 @@ int puts (const char* s) { if (!bootstrapped) { if (!initializing) { initializing = true; - _puts = get_system_function_handle("puts", &puts); + _puts = apex_get_system_function_handle("puts", &puts); } if (!_puts) { // do nothing, effectively @@ -249,9 +251,9 @@ int puts (const char* s) { } bootstrapped = true; } - enabled() = false; - auto r = _puts(s); - enabled() = true; + apex_memory_enabled() = false; + int r = _puts(s); + apex_memory_enabled() = true; return r; } @@ -263,17 +265,17 @@ void* calloc (size_t nmemb, size_t size) __THROW { if (!bootstrapped) { if (!initializing) { initializing = true; - _calloc = get_system_function_handle("calloc", &calloc); + _calloc = apex_get_system_function_handle("calloc", &calloc); } if (!_calloc) { - return bootstrap_alloc(0, (nmemb*size)); + return apex_memory_bootstrap_alloc(0, (nmemb*size)); } - if (!all_clear()) { + if (!apex_memory_all_clear()) { return _calloc(nmemb, size); } bootstrapped = true; } - if (all_clear()) { + if (apex_memory_all_clear()) { return apex_calloc_wrapper(_calloc, nmemb, size); } return _calloc(nmemb, size); @@ -287,17 +289,17 @@ void* realloc (void* ptr, size_t size) __THROW { if (!bootstrapped) { if (!initializing) { initializing = true; - _realloc = get_system_function_handle("realloc", &realloc); + _realloc = apex_get_system_function_handle("realloc", &realloc); } if (!_realloc) { - return bootstrap_alloc(0, size); + return apex_memory_bootstrap_alloc(0, size); } - if (!all_clear()) { + if (!apex_memory_all_clear()) { return _realloc(ptr, size); } bootstrapped = true; } - if (all_clear()) { + if (apex_memory_all_clear()) { return apex_realloc_wrapper(_realloc, ptr, size); } return _realloc(ptr, size); @@ -308,7 +310,7 @@ void* realloc (void* ptr, size_t size) __THROW { void* memalign (size_t alignment, size_t size) { static memalign_p _memalign = NULL; if (!_memalign) { - _memalign = get_system_function_handle("memalign", &memalign); + _memalign = apex_get_system_function_handle("memalign", &memalign); } return apex_memalign_wrapper(_memalign, alignment, size); } @@ -318,7 +320,7 @@ void* memalign (size_t alignment, size_t size) { void* reallocarray (void* ptr, size_t nmemb, size_t size) { static reallocarray_p _reallocarray = NULL; if (!_reallocarray) { - _reallocarray = get_system_function_handle("reallocarray", &reallocarray); + _reallocarray = apex_get_system_function_handle("reallocarray", &reallocarray); } return apex_reallocarray_wrapper(_reallocarray, ptr, nmemb, size); } @@ -328,7 +330,7 @@ void* reallocarray (void* ptr, size_t nmemb, size_t size) { void* reallocf (void* ptr, size_t size) { static reallocf_p _reallocf = NULL; if (!_reallocf) { - _reallocf = get_system_function_handle("reallocf", &reallocf); + _reallocf = apex_get_system_function_handle("reallocf", &reallocf); } return apex_reallocf_wrapper(_reallocf, ptr, size); } @@ -338,7 +340,7 @@ void* reallocf (void* ptr, size_t size) { void* valloc (size_t size) { static valloc_p _valloc = NULL; if (!_valloc) { - _valloc = get_system_function_handle("valloc", &valloc); + _valloc = apex_get_system_function_handle("valloc", &valloc); } return apex_valloc_wrapper(_valloc, size); } @@ -348,71 +350,11 @@ void* valloc (size_t size) { size_t malloc_usable_size (void* ptr) { static malloc_usable_size_p _malloc_usable_size = NULL; if (!_malloc_usable_size) { - _malloc_usable_size = get_system_function_handle("malloc_usable_size", &malloc_usable_size); + _malloc_usable_size = apex_get_system_function_handle("malloc_usable_size", &malloc_usable_size); } return apex_malloc_usable_size_wrapper(_malloc_usable_size, ptr); } #endif -#endif - -#else // Wrap via the the link line. - -void* __real_malloc(size_t); -void* __wrap_malloc(size_t size) { - return apex_malloc_wrapper(__real_malloc, size); -} - -void __real_free(void*); -void __wrap_free(void* ptr) { - return apex_free_wrapper(__real_free, ptr); -} - -void* __real_calloc(size_t, size_t); -void* __wrap_calloc(size_t nmemb, size_t size) { - return apex_calloc_wrapper(__real_calloc, nmemb, size); -} - -void* __real_realloc(void*, size_t); -void* __wrap_realloc(void* ptr, size_t size) { - return apex_realloc_wrapper(__real_realloc, ptr, size); -} - -#if 0 -#if defined(memalign) -void* __real_memalign(size_t, size_t); -void* __wrap_memalign(size_t alignment, size_t size) { - return apex_memalign_wrapper(__real_memalign, alignment, size); -} -#endif - -#if defined(reallocarray) -void* __real_reallocarray(void*, size_t, size_t); -void* __wrap_reallocarray(void* ptr, size_t nmemb, size_t size) { - return apex_reallocarray_wrapper(__real_reallocarray, ptr, nmemb, size); -} -#endif - -#if defined(reallocf) -void* __real_reallocf(void*, size_t); -void* __wrap_reallocf(void* ptr, size_t size) { - return apex_reallocf_wrapper(__real_reallocf, ptr, size); -} -#endif - -#if defined(valloc) -void* __real_valloc(size_t); -void* __wrap_valloc(size_t size) { - return apex_valloc_wrapper(__vallocllocf, size); -} -#endif - -#if defined(malloc_usable_size) -size_t __real_malloc_usable_size(void*); -size_t __wrap_malloc_usable_size(void* ptr) { - return apex_malloc_usable_size_wrapper(__malloc_usable_size, ptr); -} -#endif -#endif - +#endif // if 0 #endif //APEX_PRELOAD_LIB diff --git a/src/wrappers/memory_wrapper.h b/src/wrappers/memory_wrapper.h index a6cc4831..4c26fd51 100644 --- a/src/wrappers/memory_wrapper.h +++ b/src/wrappers/memory_wrapper.h @@ -59,11 +59,11 @@ typedef size_t (*valloc_p)(void*); extern "C" { #endif -void* apex_malloc_wrapper(malloc_p malloc_call, size_t size); -void apex_free_wrapper(free_p free_call, void* ptr); +void* apex_malloc_wrapper(const malloc_p malloc_call, const size_t size); +void apex_free_wrapper(const free_p free_call, const void* ptr); int apex_puts_wrapper(const char* s); -void* apex_calloc_wrapper(calloc_p calloc_call, size_t nmemb, size_t size); -void* apex_realloc_wrapper(realloc_p realloc_call, void* ptr, size_t size); +void* apex_calloc_wrapper(const calloc_p calloc_call, const size_t nmemb, const size_t size); +void* apex_realloc_wrapper(const realloc_p realloc_call, const void* ptr, const size_t size); void apex_memory_wrapper_init(void); void apex_memory_lights_out(void); #if 0 diff --git a/src/wrappers/memory_wrapper_internal.cpp b/src/wrappers/memory_wrapper_internal.cpp index 65c040b8..8085cd03 100644 --- a/src/wrappers/memory_wrapper_internal.cpp +++ b/src/wrappers/memory_wrapper_internal.cpp @@ -43,12 +43,12 @@ void apex_memory_wrapper_init() { APEX_UNUSED(book); } -bool& inWrapper() { +static bool& inWrapper() { thread_local static bool _inWrapper = false; return _inWrapper; } -void* apex_malloc_wrapper(malloc_p malloc_call, size_t size) { +void* apex_malloc_wrapper(const malloc_p malloc_call, const size_t size) { if(inWrapper() || apex::in_apex::get() > 0) { // Another wrapper has already intercepted the call so just pass through return malloc_call(size); @@ -57,26 +57,26 @@ void* apex_malloc_wrapper(malloc_p malloc_call, size_t size) { // do the allocation auto retval = malloc_call(size); // record the state - apex::recordAlloc(size, retval, apex::MALLOC); + apex::recordAlloc(size, retval, APEX_MALLOC); inWrapper() = false; return retval; } -void apex_free_wrapper(free_p free_call, void* ptr) { +void apex_free_wrapper(const free_p free_call, const void* ptr) { if(inWrapper() || apex::in_apex::get() > 0) { // Another wrapper has already intercepted the call so just pass through - return free_call(ptr); + return free_call((void*)ptr); } inWrapper() = true; // record the state if (ptr != nullptr) { apex::recordFree(ptr); } // do the allocation - free_call(ptr); + free_call((void*)ptr); inWrapper() = false; return; } -void* apex_calloc_wrapper(calloc_p calloc_call, size_t nmemb, size_t size) { +void* apex_calloc_wrapper(const calloc_p calloc_call, const size_t nmemb, const size_t size) { if(inWrapper() || apex::in_apex::get() > 0) { // Another wrapper has already intercepted the call so just pass through return calloc_call(nmemb, size); @@ -85,23 +85,23 @@ void* apex_calloc_wrapper(calloc_p calloc_call, size_t nmemb, size_t size) { // do the allocation auto retval = calloc_call(nmemb, size); // record the state - apex::recordAlloc(size, retval, apex::CALLOC); + apex::recordAlloc(size, retval, APEX_CALLOC); inWrapper() = false; return retval; } -void* apex_realloc_wrapper(realloc_p realloc_call, void* ptr, size_t size) { +void* apex_realloc_wrapper(const realloc_p realloc_call, const void* ptr, const size_t size) { if(inWrapper() || apex::in_apex::get() > 0) { // Another wrapper has already intercepted the call so just pass through - return realloc_call(ptr, size); + return realloc_call((void*)ptr, size); } inWrapper() = true; // record the state if (ptr != nullptr) { apex::recordFree(ptr); } // do the allocation - auto retval = realloc_call(ptr, size); + auto retval = realloc_call((void*)ptr, size); // record the state - apex::recordAlloc(size, retval, apex::REALLOC); + apex::recordAlloc(size, retval, APEX_REALLOC); inWrapper() = false; return retval; } @@ -117,7 +117,7 @@ void* apex_memalign_wrapper(memalign_p memalign_call, size_t nmemb, size_t size) inWrapper() = true; // do the allocation - auto retval = memalign_call(nmemb, size); + void* retval = memalign_call(nmemb, size); inWrapper() = false; return retval; @@ -135,7 +135,7 @@ void* apex_reallocarray_wrapper(reallocarray_p reallocarray_call, void* ptr, siz inWrapper() = true; // do the allocation - auto retval = reallocarray_call(ptr, nmemb, size); + void* retval = reallocarray_call(ptr, nmemb, size); inWrapper() = false; return retval; @@ -153,7 +153,7 @@ void* apex_reallocf_wrapper(reallocf_p reallocf_call, void* ptr, size_t size) { inWrapper() = true; // do the allocation - auto retval = reallocf_call(ptr, size); + void* retval = reallocf_call(ptr, size); inWrapper() = false; return retval; @@ -171,7 +171,7 @@ void* apex_valloc_wrapper(valloc_p valloc_call, size_t size) { inWrapper() = true; // do the allocation - auto retval = valloc_call(size); + void* retval = valloc_call(size); inWrapper() = false; return retval; @@ -189,7 +189,7 @@ size_t apex_malloc_usable_size_wrapper(malloc_usable_size_p malloc_usable_size_c inWrapper() = true; // do the allocation - auto retval = malloc_usable_size_call(ptr); + void* retval = malloc_usable_size_call(ptr); inWrapper() = false; return retval; @@ -197,55 +197,5 @@ size_t apex_malloc_usable_size_wrapper(malloc_usable_size_p malloc_usable_size_c } #endif -#endif - -extern "C" void* apex_malloc(size_t size) { - return apex_malloc_wrapper(malloc, size); -} - -extern "C" void apex_free(void* ptr) { - return apex_free_wrapper(free, ptr); -} - -extern "C" void* apex_calloc(size_t nmemb, size_t size) { - return apex_calloc_wrapper(calloc, nmemb, size); -} - -extern "C" void* apex_realloc(void* ptr, size_t size) { - return apex_realloc_wrapper(realloc, ptr, size); -} - -#if 0 -#if defined(memalign) -extern "C" void* apex_memalign(size_t nmemb, size_t size) { - return apex_memalign_wrapper(memalign, nmemb, size); -} -#endif - -#if defined(reallocarray) -extern "C" void* apex_reallocarray(void* ptr, size_t nmemb, size_t size) { - return apex_reallocarray_wrapper(reallocarray, ptr, nmemb, size); -} -#endif - -#if defined(reallocf) -extern "C" void* apex_reallocf(void* ptr, size_t size) { - return apex_reallocf_wrapper(reallocf, ptr, size); -} -#endif - -#if defined(valloc) -extern "C" void* apex_valloc(size_t size) { - return apex_valloc_wrapper(valloc, size); -} -#endif - -#if defined(malloc_usable_size) -extern "C" void* apex_malloc_usable_size(void* ptr) { - return apex_malloc_usable_size_wrapper(malloc_usable_size, ptr); -} -#endif - -#endif - +#endif // if 0 From d656d07372bdb9a0cf50502a81d1fe51906efc01 Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Wed, 28 Feb 2024 11:58:40 -0800 Subject: [PATCH 4/4] Updating roofline stats to use new CSV output --- src/scripts/roofline_stats.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/scripts/roofline_stats.py b/src/scripts/roofline_stats.py index 49d6ee4b..2933387c 100755 --- a/src/scripts/roofline_stats.py +++ b/src/scripts/roofline_stats.py @@ -2,9 +2,9 @@ import pandas as pd import glob -print('Reading METRIC_GROUP_0/apex.0.csv') -df = pd.read_csv('METRIC_GROUP_0/apex.0.csv') -for file_name in glob.glob('METRIC_GROUP_[1-9]*/apex.0.csv'): +print('Reading METRIC_GROUP_0/apex_profiles.csv') +df = pd.read_csv('METRIC_GROUP_0/apex_profiles.csv') +for file_name in glob.glob('METRIC_GROUP_[1-9]*/apex_profiles.csv'): print("Reading", file_name) x = pd.read_csv(file_name) for column in x: