@@ -35,7 +35,8 @@ CResourceMonitor::CResourceMonitor(double byteLimitMargin)
3535 m_HasPruningStarted(false ), m_PruneThreshold(0 ), m_LastPruneTime(0 ),
3636 m_PruneWindow(std::numeric_limits<std::size_t >::max()),
3737 m_PruneWindowMaximum(std::numeric_limits<std::size_t >::max()),
38- m_PruneWindowMinimum(std::numeric_limits<std::size_t >::max()), m_NoLimit(false ) {
38+ m_PruneWindowMinimum(std::numeric_limits<std::size_t >::max()),
39+ m_NoLimit(false ), m_CurrentBytesExceeded(0 ) {
3940 this ->updateMemoryLimitsAndPruneThreshold (DEFAULT_MEMORY_LIMIT_MB);
4041}
4142
@@ -108,18 +109,21 @@ void CResourceMonitor::refresh(CAnomalyDetector& detector) {
108109
109110void CResourceMonitor::forceRefresh (CAnomalyDetector& detector) {
110111 this ->memUsage (&detector);
111- core::CProgramCounters::counter (counter_t ::E_TSADMemoryUsage) = this ->totalMemory ();
112- LOG_TRACE (<< " Checking allocations: currently at " << this ->totalMemory ());
112+
113113 this ->updateAllowAllocations ();
114114}
115115
116116void CResourceMonitor::updateAllowAllocations () {
117117 std::size_t total{this ->totalMemory ()};
118+ core::CProgramCounters::counter (counter_t ::E_TSADMemoryUsage) = total;
119+ LOG_TRACE (<< " Checking allocations: currently at " << total);
118120 if (m_AllowAllocations) {
119121 if (total > this ->highLimit ()) {
120122 LOG_INFO (<< " Over current allocation high limit. " << total
121123 << " bytes used, the limit is " << this ->highLimit ());
122124 m_AllowAllocations = false ;
125+ std::size_t bytesExceeded{total - this ->highLimit ()};
126+ m_CurrentBytesExceeded = this ->adjustedUsage (bytesExceeded);
123127 }
124128 } else if (total < this ->lowLimit ()) {
125129 LOG_INFO (<< " Below current allocation low limit. " << total
@@ -204,13 +208,6 @@ bool CResourceMonitor::areAllocationsAllowed() const {
204208 return m_AllowAllocations;
205209}
206210
207- bool CResourceMonitor::areAllocationsAllowed (std::size_t size) const {
208- if (m_AllowAllocations) {
209- return this ->totalMemory () + size < this ->highLimit ();
210- }
211- return false ;
212- }
213-
214211std::size_t CResourceMonitor::allocationLimit () const {
215212 return this ->highLimit () - std::min (this ->highLimit (), this ->totalMemory ());
216213}
@@ -268,6 +265,9 @@ CResourceMonitor::SResults CResourceMonitor::createMemoryUsageReport(core_t::TTi
268265 res.s_OverFields = 0 ;
269266 res.s_PartitionFields = 0 ;
270267 res.s_Usage = this ->totalMemory ();
268+ res.s_AdjustedUsage = this ->adjustedUsage (res.s_Usage );
269+ res.s_BytesMemoryLimit = 2 * m_ByteLimitHigh;
270+ res.s_BytesExceeded = m_CurrentBytesExceeded;
271271 res.s_AllocationFailures = 0 ;
272272 res.s_MemoryStatus = m_MemoryStatus;
273273 res.s_BucketStartTime = bucketStartTime;
@@ -281,6 +281,25 @@ CResourceMonitor::SResults CResourceMonitor::createMemoryUsageReport(core_t::TTi
281281 return res;
282282}
283283
284+ std::size_t CResourceMonitor::adjustedUsage (std::size_t usage) const {
285+ // Background persist causes the memory size to double due to copying
286+ // the models. On top of that, after the persist is done we may not
287+ // be able to retrieve that memory back. Thus, we report twice the
288+ // memory usage in order to allow for that.
289+ // See https://github.com/elastic/x-pack-elasticsearch/issues/1020.
290+ // Issue https://github.com/elastic/x-pack-elasticsearch/issues/857
291+ // discusses adding an option to perform only foreground persist.
292+ // If that gets implemented, we should only double when background
293+ // persist is configured.
294+
295+ // We also scale the reported memory usage by the inverse of the byte limit margin.
296+ // This gives the user a fairer indication of how close the job is to hitting
297+ // the model memory limit in a concise manner (as the limit is scaled down by
298+ // the margin during the beginning period of the job's existence).
299+ size_t adjustedUsage = static_cast <std::size_t >(2 * usage / m_ByteLimitMargin);
300+ return adjustedUsage;
301+ }
302+
284303void CResourceMonitor::acceptAllocationFailureResult (core_t ::TTime time) {
285304 m_MemoryStatus = model_t ::E_MemoryStatusHardLimit;
286305 ++m_AllocationFailures[time];
0 commit comments