diff --git a/src/cudamatrix/cu-allocator.cc b/src/cudamatrix/cu-allocator.cc index d1617bfedd4..d057a3f9ec1 100644 --- a/src/cudamatrix/cu-allocator.cc +++ b/src/cudamatrix/cu-allocator.cc @@ -223,6 +223,9 @@ void* CuMemoryAllocator::MallocFromSubregion(SubRegion *subregion, block->allocated = true; block->t = t_; allocated_block_map_[block->begin] = block; + allocated_memory_ += (block->end - block->begin); + if (allocated_memory_ > max_allocated_memory_) + max_allocated_memory_ = allocated_memory_; return block->begin; } @@ -359,7 +362,9 @@ void CuMemoryAllocator::PrintMemoryUsage() const { << tot_time_taken_ << "/" << malloc_time_taken_ << ", synchronized the GPU " << num_synchronizations_ << " times out of " << (t_/2) << " frees; " - << "device memory info: " << GetFreeGpuMemory(NULL, NULL); + << "device memory info: " << GetFreeGpuMemory(NULL, NULL) + << "maximum allocated: " << max_allocated_memory_ + << "current allocated: " << allocated_memory_; } // Note: we just initialize with the default options, but we can change it later @@ -370,7 +375,9 @@ CuMemoryAllocator::CuMemoryAllocator(): synchronize_gpu_t_(0), num_synchronizations_(0), tot_time_taken_(0.0), - malloc_time_taken_(0.0) { + malloc_time_taken_(0.0), + max_allocated_memory_(0), + allocated_memory_(0) { // Note: we don't allocate any memory regions at the start; we wait for the user // to call Malloc() or MallocPitch(), and then allocate one when needed. } @@ -413,6 +420,7 @@ void CuMemoryAllocator::Free(void *ptr) { << ptr; } MemoryBlock *block = iter->second; + allocated_memory_ -= (block->end - block->begin); allocated_block_map_.erase(iter); block->t = t_; block->thread_id = std::this_thread::get_id(); diff --git a/src/cudamatrix/cu-allocator.h b/src/cudamatrix/cu-allocator.h index 9dd2bb82aea..3028be2b576 100644 --- a/src/cudamatrix/cu-allocator.h +++ b/src/cudamatrix/cu-allocator.h @@ -335,6 +335,11 @@ class CuMemoryAllocator { // this is only locked by the '*Locking' versions of the functions (necessary only // in multi-threaded applications). std::mutex mutex_; + + // Keep track of the memory usage from the cache to track the maximum memory used by + // the application + size_t max_allocated_memory_; + size_t allocated_memory_; };