Skip to content

Commit a870bcc

Browse files
Adding threshold for logging memory pool allocations
Summary: We have a leak check in memory pool the prints all current allocations along with call sites. In this diff I'm expanding that functionality and adding possibility to log that information once we reach configurable threshold. It's supposed to be used for debugging, when we want to know how exactly the memory has been used in particular memory pool. Adding this only for leaf nodes for now. Differential Revision: D79902261
1 parent 845ff45 commit a870bcc

File tree

3 files changed

+84
-14
lines changed

3 files changed

+84
-14
lines changed

velox/common/memory/MemoryPool.cpp

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include "velox/common/memory/MemoryPool.h"
1818

1919
#include <signal.h>
20-
#include <set>
2120

2221
#include "velox/common/base/Counters.h"
2322
#include "velox/common/base/StatsReporter.h"
@@ -1203,10 +1202,37 @@ void MemoryPoolImpl::recordAllocDbg(const void* addr, uint64_t size) {
12031202
if (!needRecordDbg(true)) {
12041203
return;
12051204
}
1205+
AllocationRecord allocationRecord{size, process::StackTrace()};
12061206
std::lock_guard<std::mutex> l(debugAllocMutex_);
1207-
debugAllocRecords_.emplace(
1208-
reinterpret_cast<uint64_t>(addr),
1209-
AllocationRecord{size, process::StackTrace()});
1207+
auto [it, inserted] = debugAllocRecords_.try_emplace(
1208+
reinterpret_cast<uint64_t>(addr), std::move(allocationRecord));
1209+
VELOX_CHECK(
1210+
inserted,
1211+
fmt::format(
1212+
"[MemoryPool] Duplicate allocation record for {} bytes.\n"
1213+
"======== Allocation Stack ========\n"
1214+
"{}\n",
1215+
size,
1216+
allocationRecord.callStack.toString()));
1217+
debugAllocRecordsTotalSize_ += size;
1218+
if (debugOptions_->debugPoolWarnThresholdBytes > 0 &&
1219+
debugAllocRecordsTotalSize_ >
1220+
debugOptions_->debugPoolWarnThresholdBytes &&
1221+
!debugWarnThresholdExceeded_) {
1222+
debugWarnThresholdExceeded_ = true;
1223+
VELOX_MEM_LOG(WARNING) << fmt::format(
1224+
"[MemoryPool] Memory pool '{}' exceeded warning threshold of {} bytes with allocation of {} bytes, resulting in total size of {} bytes.\n"
1225+
"======== Allocation Stack ========\n"
1226+
"{}\n"
1227+
"======= Current Allocations ======\n"
1228+
"{}",
1229+
name_,
1230+
debugOptions_->debugPoolWarnThresholdBytes,
1231+
size,
1232+
debugAllocRecordsTotalSize_,
1233+
it->second.callStack.toString(),
1234+
dumpRecordsDbg());
1235+
}
12101236
}
12111237

12121238
void MemoryPoolImpl::recordAllocDbg(const Allocation& allocation) {
@@ -1251,6 +1277,7 @@ void MemoryPoolImpl::recordFreeDbg(const void* addr, uint64_t size) {
12511277
freeStackTrace));
12521278
}
12531279
debugAllocRecords_.erase(addrUint64);
1280+
debugAllocRecordsTotalSize_ -= size;
12541281
}
12551282

12561283
void MemoryPoolImpl::recordFreeDbg(const Allocation& allocation) {
@@ -1280,6 +1307,8 @@ void MemoryPoolImpl::recordGrowDbg(const void* addr, uint64_t newSize) {
12801307
if (allocResult == debugAllocRecords_.end()) {
12811308
VELOX_FAIL("Growing of un-allocated memory. Free address {}.", addrUint64);
12821309
}
1310+
debugAllocRecordsTotalSize_ =
1311+
debugAllocRecordsTotalSize_ - allocResult->second.size + newSize;
12831312
allocResult->second.size = newSize;
12841313
}
12851314

@@ -1288,10 +1317,16 @@ void MemoryPoolImpl::leakCheckDbg() {
12881317
if (debugAllocRecords_.empty()) {
12891318
return;
12901319
}
1291-
std::stringbuf buf;
1292-
std::ostream oss(&buf);
1293-
oss << "[MemoryPool] : " << name_ << " - Detected total of "
1294-
<< debugAllocRecords_.size() << " leaked allocations:\n";
1320+
VELOX_FAIL(fmt::format(
1321+
"[MemoryPool] Leak check failed for '{}' pool - {}",
1322+
name_,
1323+
dumpRecordsDbg()));
1324+
}
1325+
1326+
std::string MemoryPoolImpl::dumpRecordsDbg() {
1327+
VELOX_CHECK(debugEnabled());
1328+
std::stringstream oss;
1329+
oss << fmt::format("Found {} allocations:\n", debugAllocRecords_.size());
12951330
struct AllocationStats {
12961331
uint64_t size{0};
12971332
uint64_t numAllocations{0};
@@ -1316,12 +1351,13 @@ void MemoryPoolImpl::leakCheckDbg() {
13161351
return a.second.size > b.second.size;
13171352
});
13181353
for (const auto& pair : sortedRecords) {
1319-
oss << "======== Leaked memory from " << pair.second.numAllocations
1320-
<< " total allocations of " << succinctBytes(pair.second.size)
1321-
<< " total size ========\n"
1322-
<< pair.first << "\n";
1354+
oss << fmt::format(
1355+
"======== {} allocations of {} total size ========\n{}\n",
1356+
pair.second.numAllocations,
1357+
succinctBytes(pair.second.size),
1358+
pair.first);
13231359
}
1324-
VELOX_FAIL(buf.str());
1360+
return oss.str();
13251361
}
13261362

13271363
void MemoryPoolImpl::handleAllocationFailure(

velox/common/memory/MemoryPool.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,12 @@ class MemoryPool : public std::enable_shared_from_this<MemoryPool> {
112112
/// memory pools whose name matches the specified regular expression. Empty
113113
/// string means no match for all.
114114
std::string debugPoolNameRegex;
115+
116+
/// Warning threshold in bytes for debug memory pools. When set to a
117+
/// non-zero value, a warning will be logged once per memory pool when
118+
/// allocations cause the pool to exceed this threshold. A value of
119+
/// 0 means no warning threshold is enforced.
120+
uint64_t debugPoolWarnThresholdBytes{0};
115121
};
116122

117123
struct Options {
@@ -963,7 +969,11 @@ class MemoryPoolImpl : public MemoryPool {
963969
bool needRecordDbg(bool isAlloc);
964970

965971
// Invoked to record the call stack of a buffer allocation if debug mode of
966-
// this memory pool is enabled.
972+
// this memory pool is enabled. If debugPoolWarnThresholdBytes is configured
973+
// and the total debug allocation size exceeds this threshold, a warning is
974+
// logged once per memory pool containing the allocation stack trace and
975+
// current pool allocations summary for debugging purposes. Same for other
976+
// 'recordAllocDbg(..)' calls below.
967977
void recordAllocDbg(const void* addr, uint64_t size);
968978

969979
// Invoked to record the call stack of a non-contiguous allocation if debug
@@ -997,6 +1007,10 @@ class MemoryPoolImpl : public MemoryPool {
9971007
// pool is enabled.
9981008
void leakCheckDbg();
9991009

1010+
// Dump the recorded call sites of the memory allocations in
1011+
// 'debugAllocRecords_' to the string.
1012+
std::string dumpRecordsDbg();
1013+
10001014
void handleAllocationFailure(const std::string& failureMessage);
10011015

10021016
MemoryManager* const manager_;
@@ -1064,6 +1078,12 @@ class MemoryPoolImpl : public MemoryPool {
10641078

10651079
// Map from address to 'AllocationRecord'.
10661080
std::unordered_map<uint64_t, AllocationRecord> debugAllocRecords_;
1081+
1082+
// Total size of all the memory allocations recorded in 'debugAllocRecords_'.
1083+
uint64_t debugAllocRecordsTotalSize_{0};
1084+
1085+
// Flag to track if warning threshold has been exceeded once for this pool.
1086+
bool debugWarnThresholdExceeded_{false};
10671087
};
10681088

10691089
/// An Allocator backed by a memory pool for STL containers.

velox/core/QueryConfig.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,14 @@ class QueryConfig {
527527
static constexpr const char* kDebugMemoryPoolNameRegex =
528528
"debug_memory_pool_name_regex";
529529

530+
/// Warning threshold in bytes for debug memory pools. When set to a
531+
/// non-zero value, a warning will be logged once per memory pool when
532+
/// allocations cause the pool to exceed this threshold. This is useful for
533+
/// identifying memory usage patterns during debugging. A value of
534+
/// 0 means no warning threshold is enforced.
535+
static constexpr const char* kDebugMemoryPoolWarnThresholdBytes =
536+
"debug_memory_pool_warn_threshold_bytes";
537+
530538
/// Some lambda functions over arrays and maps are evaluated in batches of the
531539
/// underlying elements that comprise the arrays/maps. This is done to make
532540
/// the batch size managable as array vectors can have thousands of elements
@@ -692,6 +700,12 @@ class QueryConfig {
692700
return get<std::string>(kDebugMemoryPoolNameRegex, "");
693701
}
694702

703+
uint64_t debugMemoryPoolWarnThresholdBytes() const {
704+
return config::toCapacity(
705+
get<std::string>(kDebugMemoryPoolWarnThresholdBytes, "0B"),
706+
config::CapacityUnit::BYTE);
707+
}
708+
695709
std::optional<uint32_t> debugAggregationApproxPercentileFixedRandomSeed()
696710
const {
697711
return get<uint32_t>(kDebugAggregationApproxPercentileFixedRandomSeed);

0 commit comments

Comments
 (0)