[Darwin] MTRDevice should throttle writes to the attribute storage

jtung-apple · jtung-apple · commit 8c0085a6971f · 2024-05-21T13:14:26.000-07:00
diff --git a/src/darwin/Framework/CHIP/MTRDevice.mm b/src/darwin/Framework/CHIP/MTRDevice.mm
@@ -419,6 +419,46 @@ @implementation MTRDevice {
     // Tracking of initial subscribe latency.  When _initialSubscribeStart is
     // nil, we are not tracking the latency.
     NSDate * _Nullable _initialSubscribeStart;
+
+    //
+
+    // Each time a report comes in, MTRDevice will wait _reportToPersistDelayTime before persisting the
+    // changes to storage. If another report comes in during this internal, MTRDevice will wait another
+    // _reportToPersistDelayTime interval, until _reportToPersistDelayTimeMax is reached, at which
+    // point all the changes so far will be written to storage.
+    //
+    // MTRDevice will also track a fixed number of report times. If the running average time between
+    // reports dips below _timeBetweenReportsTooShortThreshold, a portion of the
+    // _reportToPersistenceDalayMaxMultiplier will be applied to both the _reportToPersistenceDelayTime
+    // and _reportToPersistenceDelayTimeMax. The multiplier will reach the max when the average time
+    // between reports reach _timeBetweenReportsTooShortMinThreshold.
+    //
+    // When the running average time between reports dips below _timeBetweenReportsTooShortMinThreshold
+    // for the first time, the time will be noted. If the device remains in this state for longer than
+    // _deviceReportingTooFrequentlyThreshold, persistence will stop until the average time between
+    // reports go back above _timeBetweenReportsTooShortMinThreshold.
+    NSDate * _Nullable _clusterDataPersistenceFirstScheduledTime;
+    NSMutableArray<NSDate *> * _mostRecentReportTimes;
+    NSDate * _Nullable _deviceReportingExcessivelyStartTime;
+    double _reportToPersistenceDalayCurrentMultiplier;
+
+    // Threshold values and limits for the above, so they can be tested
+    NSTimeInterval _reportToPersistenceDelayTime;
+    NSTimeInterval _reportToPersistenceDelayTimeMax;
+    NSUInteger _recentReportTimesMaxCount;
+    NSTimeInterval _timeBetweenReportsTooShortThreshold;
+    NSTimeInterval _timeBetweenReportsTooShortMinThreshold;
+    double _reportToPersistenceDalayMaxMultiplier;
+    NSTimeInterval _deviceReportingExcessivelyIntervalThreshold;
+
+    // Default values for the thresholds and limits
+#define kReportToPersistenceDelayTimeDefault (15)
+#define kReportToPersistenceDelayTimeMaxDefault (20 * kReportToPersistenceDelayTimeDefault)
+#define kRecentReportTimesMaxCountDefault (12)
+#define kTimeBetweenReportsTooShortThresholdDefault (15)
+#define kTimeBetweenReportsTooShortMinThresholdDefault (5)
+#define kReportToPersistenceDelayMaxMultiplierDefault (10)
+#define kDeviceReportingExcessivelyIntervalThresholdDefault (5 * 60)
 }
 
 - (instancetype)initWithNodeID:(NSNumber *)nodeID controller:(MTRDeviceController *)controller
@@ -442,6 +482,18 @@ - (instancetype)initWithNodeID:(NSNumber *)nodeID controller:(MTRDeviceControlle
         }
         _clusterDataToPersist = nil;
         _persistedClusters = [NSMutableSet set];
+
+        _clusterDataPersistenceFirstScheduledTime = nil;
+        _mostRecentReportTimes = [NSMutableArray array];
+        _deviceReportingExcessivelyStartTime = nil;
+        _reportToPersistenceDalayCurrentMultiplier = 1;
+        _reportToPersistenceDelayTime = kReportToPersistenceDelayTimeDefault;
+        _reportToPersistenceDelayTimeMax = kReportToPersistenceDelayTimeMaxDefault;
+        _recentReportTimesMaxCount = kRecentReportTimesMaxCountDefault;
+        _timeBetweenReportsTooShortThreshold = kTimeBetweenReportsTooShortThresholdDefault;
+        _timeBetweenReportsTooShortMinThreshold = kTimeBetweenReportsTooShortMinThresholdDefault;
+        _reportToPersistenceDalayMaxMultiplier = kReportToPersistenceDelayMaxMultiplierDefault;
+        _deviceReportingExcessivelyIntervalThreshold = kDeviceReportingExcessivelyIntervalThresholdDefault;
         MTR_LOG_INFO("%@ init with hex nodeID 0x%016llX", self, _nodeID.unsignedLongLongValue);
     }
     return self;
@@ -1287,37 +1339,162 @@ - (void)_handleReportBegin
     return clusterDataToReturn;
 }
 
+- (NSTimeInterval)_reportToPersistenceDelayTimeAfterMutiplier
+{
+    return _reportToPersistenceDelayTime * _reportToPersistenceDalayCurrentMultiplier;
+}
+
+- (NSTimeInterval)_reportToPersistenceDelayTimeMaxAfterMutiplier
+{
+    return _reportToPersistenceDelayTimeMax * _reportToPersistenceDalayCurrentMultiplier;
+}
+
+- (void)_persistClusterDataAsNeeded
+{
+    std::lock_guard lock(_lock);
+
+    // No persisted data / lack of controller data store
+    if (!_persistedClusterData) {
+        return;
+    }
+
+    // Nothing to persist
+    if (!_clusterDataToPersist.count) {
+        return;
+    }
+
+    NSDate * lastReportTime = [_mostRecentReportTimes lastObject];
+    NSTimeInterval intervalSinceLastReport = -[lastReportTime timeIntervalSinceNow];
+    if (intervalSinceLastReport < [self _reportToPersistenceDelayTimeAfterMutiplier]) {
+        // A report came in after this call was scheduled
+
+        if (!_clusterDataPersistenceFirstScheduledTime) {
+            MTR_LOG_ERROR("%@ _persistClusterDataAsNeeded expects _clusterDataPersistenceFirstScheduledTime if _clusterDataToPersist exists", self);
+            return;
+        }
+
+        NSTimeInterval intervalSinceFirstScheduledPersistence = -[_clusterDataPersistenceFirstScheduledTime timeIntervalSinceNow];
+        if (intervalSinceFirstScheduledPersistence < [self _reportToPersistenceDelayTimeMaxAfterMutiplier]) {
+            // The max delay is also not reached - do not persist yet
+            return;
+        }
+    }
+
+    // At this point, there is data to persist, and either _reportToPersistenceDelayTime was
+    // reached, or _reportToPersistenceDelayTimeMax was reached. Time to persist:
+
+    MTR_LOG_DEFAULT("%@ Storing cluster information (data version and attributes) count: %lu", self, static_cast<unsigned long>(_clusterDataToPersist.count));
+    // We're going to hand out these MTRDeviceClusterData objects to our
+    // storage implementation, which will try to read them later.  Make sure
+    // we snapshot the state here instead of handing out live copies.
+    NSDictionary<MTRClusterPath *, MTRDeviceClusterData *> * clusterData = [self _clusterDataToPersistSnapshot];
+    [_deviceController.controllerDataStore storeClusterData:clusterData forNodeID:_nodeID];
+    for (MTRClusterPath * clusterPath in _clusterDataToPersist) {
+        [_persistedClusterData setObject:_clusterDataToPersist[clusterPath] forKey:clusterPath];
+        [_persistedClusters addObject:clusterPath];
+    }
+
+    // TODO: There is one edge case not handled well here: if the
+    // storeClusterData call above fails somehow, and then the data gets
+    // evicted from _persistedClusterData, we could end up in a situation
+    // where when we page things in from storage we have stale values and
+    // hence effectively lose the delta that we failed to persist.
+    //
+    // The only way to handle this would be to detect it when it happens,
+    // then re-subscribe at that point, which would cause the relevant data
+    // to be sent to us via the priming read.
+    _clusterDataToPersist = nil;
+
+    _clusterDataPersistenceFirstScheduledTime = nil;
+}
+
+- (void)_scheduleClusterDataPersistence
+{
+    os_unfair_lock_assert_owner(&self->_lock);
+
+    // No persisted data / lack of controller data store
+    if (!_persistedClusterData) {
+        return;
+    }
+
+    // Nothing to persist
+    if (!_clusterDataToPersist.count) {
+        return;
+    }
+
+    // Mark when first report comes in to know when _reportToPersistenceDelayTimeMax is hit
+    if (!_clusterDataPersistenceFirstScheduledTime) {
+        _clusterDataPersistenceFirstScheduledTime = [NSDate now];
+    }
+
+    // Make sure there is space in the array, and note report time
+    while (_mostRecentReportTimes.count >= _recentReportTimesMaxCount) {
+        [_mostRecentReportTimes removeObjectAtIndex:0];
+    }
+    [_mostRecentReportTimes addObject:[NSDate now]];
+
+    // Calculate running average and update multiplier - need at least 2 items to calculate intervals
+    if (_mostRecentReportTimes.count > 2) {
+        NSTimeInterval cumulativeIntervals = 0;
+        for (int i = 1; i < _mostRecentReportTimes.count; i++) {
+            NSDate * lastDate = [_mostRecentReportTimes objectAtIndex:i - 1];
+            NSDate * currentDate = [_mostRecentReportTimes objectAtIndex:i];
+            NSTimeInterval intervalSinceLastReport = [currentDate timeIntervalSinceDate:lastDate];
+            // Check to guard against clock change
+            if (intervalSinceLastReport > 0) {
+                cumulativeIntervals += intervalSinceLastReport;
+            }
+        }
+        NSTimeInterval averageTimeBetweenReports = cumulativeIntervals / (_mostRecentReportTimes.count - 1);
+
+        if (averageTimeBetweenReports < _timeBetweenReportsTooShortThreshold) {
+            // Multiplier goes from 1 to _reportToPersistenceDalayMaxMultiplier uniformly, as
+            // averageTimeBetweenReports go from _timeBetweenReportsTooShortThreshold to
+            // _timeBetweenReportsTooShortMinThreshold
+
+            double intervalAmountBelowThreshold = _timeBetweenReportsTooShortThreshold - averageTimeBetweenReports;
+            double proportionTowardMinThreshold = intervalAmountBelowThreshold / (_timeBetweenReportsTooShortThreshold - _timeBetweenReportsTooShortMinThreshold);
+            if (proportionTowardMinThreshold > 1) {
+                // Clamp to 100%
+                proportionTowardMinThreshold = 1;
+            }
+
+            // Set current multiplier to [1, MaxMultiplier]
+            _reportToPersistenceDalayCurrentMultiplier = 1 + (proportionTowardMinThreshold * (_reportToPersistenceDalayMaxMultiplier - 1));
+        } else {
+            _reportToPersistenceDalayCurrentMultiplier = 1;
+        }
+
+        // Also note when the running average first dips below the min threshold
+        if (averageTimeBetweenReports < _timeBetweenReportsTooShortMinThreshold) {
+            if (!_deviceReportingExcessivelyStartTime) {
+                _deviceReportingExcessivelyStartTime = [NSDate now];
+                MTR_LOG_DEFAULT("Device is ");
+            }
+        } else {
+            _deviceReportingExcessivelyStartTime = nil;
+        }
+    }
+
+    // Do not schedule persistence if device is reporting excessively
+    NSTimeInterval intervalSinceDeviceReportingExcessively = -[_deviceReportingExcessivelyStartTime timeIntervalSinceNow];
+    if (intervalSinceDeviceReportingExcessively > _deviceReportingExcessivelyIntervalThreshold) {
+        return;
+    }
+
+    dispatch_after(dispatch_time(DISPATCH_TIME_NOW, (int64_t) ([self _reportToPersistenceDelayTimeAfterMutiplier] * NSEC_PER_SEC)), self.queue, ^{
+        [self _persistClusterDataAsNeeded];
+    });
+}
+
 - (void)_handleReportEnd
 {
     std::lock_guard lock(_lock);
     _receivingReport = NO;
     _receivingPrimingReport = NO;
     _estimatedStartTimeFromGeneralDiagnosticsUpTime = nil;
 
-    BOOL dataStoreExists = _deviceController.controllerDataStore != nil;
-    if (dataStoreExists && _clusterDataToPersist != nil && _clusterDataToPersist.count) {
-        MTR_LOG_DEFAULT("%@ Storing cluster information (data version and attributes) count: %lu", self, static_cast<unsigned long>(_clusterDataToPersist.count));
-        // We're going to hand out these MTRDeviceClusterData objects to our
-        // storage implementation, which will try to read them later.  Make sure
-        // we snapshot the state here instead of handing out live copies.
-        NSDictionary<MTRClusterPath *, MTRDeviceClusterData *> * clusterData = [self _clusterDataToPersistSnapshot];
-        [_deviceController.controllerDataStore storeClusterData:clusterData forNodeID:_nodeID];
-        for (MTRClusterPath * clusterPath in _clusterDataToPersist) {
-            [_persistedClusterData setObject:_clusterDataToPersist[clusterPath] forKey:clusterPath];
-            [_persistedClusters addObject:clusterPath];
-        }
-
-        // TODO: There is one edge case not handled well here: if the
-        // storeClusterData call above fails somehow, and then the data gets
-        // evicted from _persistedClusterData, we could end up in a situation
-        // where when we page things in from storage we have stale values and
-        // hence effectively lose the delta that we failed to persist.
-        //
-        // The only way to handle this would be to detect it when it happens,
-        // then re-subscribe at that point, which would cause the relevant data
-        // to be sent to us via the priming read.
-        _clusterDataToPersist = nil;
-    }
+    [self _scheduleClusterDataPersistence];
 
     // After the handling of the report, if we detected a device configuration change, notify the delegate
     // of the same.