Skip to content

Commit c67684b

Browse files
authored
Fix a deadlock in EventSource and CounterGroup (#40259)
* Fix a deadlock in CounterGroup and EventSource * add more comments * Add some more comments * PR feedback * Add comments
1 parent 75495b7 commit c67684b

File tree

1 file changed

+46
-10
lines changed
  • src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing

1 file changed

+46
-10
lines changed

src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/CounterGroup.cs

+46-10
Original file line numberDiff line numberDiff line change
@@ -200,22 +200,47 @@ private void ResetCounters()
200200

201201
private void OnTimer()
202202
{
203-
Debug.Assert(Monitor.IsEntered(s_counterGroupLock));
204203
if (_eventSource.IsEnabled())
205204
{
206-
DateTime now = DateTime.UtcNow;
207-
TimeSpan elapsed = now - _timeStampSinceCollectionStarted;
205+
DateTime now;
206+
TimeSpan elapsed;
207+
int pollingIntervalInMilliseconds;
208+
DiagnosticCounter[] counters;
209+
lock (s_counterGroupLock)
210+
{
211+
now = DateTime.UtcNow;
212+
elapsed = now - _timeStampSinceCollectionStarted;
213+
pollingIntervalInMilliseconds = _pollingIntervalInMilliseconds;
214+
counters = new DiagnosticCounter[_counters.Count];
215+
_counters.CopyTo(counters);
216+
}
208217

209-
foreach (DiagnosticCounter counter in _counters)
218+
// MUST keep out of the scope of s_counterGroupLock because this will cause WritePayload
219+
// callback can be re-entrant to CounterGroup (i.e. it's possible it calls back into EnableTimer()
220+
// above, since WritePayload callback can contain user code that can invoke EventSource constructor
221+
// and lead to a deadlock. (See https://github.com/dotnet/runtime/issues/40190 for details)
222+
foreach (DiagnosticCounter counter in counters)
210223
{
211-
counter.WritePayload((float)elapsed.TotalSeconds, _pollingIntervalInMilliseconds);
224+
// NOTE: It is still possible for a race condition to occur here. An example is if the session
225+
// that subscribed to these batch of counters was disabled and it was immediately enabled in
226+
// a different session, some of the counter data that was supposed to be written to the old
227+
// session can now "overflow" into the new session.
228+
// This problem pre-existed to this change (when we used to hold lock in the call to WritePayload):
229+
// the only difference being the old behavior caused the entire batch of counters to be either
230+
// written to the old session or the new session. The behavior change is not being treated as a
231+
// significant problem to address for now, but we can come back and address it if it turns out to
232+
// be an actual issue.
233+
counter.WritePayload((float)elapsed.TotalSeconds, pollingIntervalInMilliseconds);
212234
}
213-
_timeStampSinceCollectionStarted = now;
214235

215-
do
236+
lock (s_counterGroupLock)
216237
{
217-
_nextPollingTimeStamp += new TimeSpan(0, 0, 0, 0, _pollingIntervalInMilliseconds);
218-
} while (_nextPollingTimeStamp <= now);
238+
_timeStampSinceCollectionStarted = now;
239+
do
240+
{
241+
_nextPollingTimeStamp += new TimeSpan(0, 0, 0, 0, _pollingIntervalInMilliseconds);
242+
} while (_nextPollingTimeStamp <= now);
243+
}
219244
}
220245
}
221246

@@ -228,8 +253,15 @@ private void OnTimer()
228253
private static void PollForValues()
229254
{
230255
AutoResetEvent? sleepEvent = null;
256+
257+
// Cache of onTimer callbacks for each CounterGroup.
258+
// We cache these outside of the scope of s_counterGroupLock because
259+
// calling into the callbacks can cause a re-entrancy into CounterGroup.Enable()
260+
// and result in a deadlock. (See https://github.com/dotnet/runtime/issues/40190 for details)
261+
List<Action> onTimers = new List<Action>();
231262
while (true)
232263
{
264+
onTimers.Clear();
233265
int sleepDurationInMilliseconds = int.MaxValue;
234266
lock (s_counterGroupLock)
235267
{
@@ -239,14 +271,18 @@ private static void PollForValues()
239271
DateTime now = DateTime.UtcNow;
240272
if (counterGroup._nextPollingTimeStamp < now + new TimeSpan(0, 0, 0, 0, 1))
241273
{
242-
counterGroup.OnTimer();
274+
onTimers.Add(() => counterGroup.OnTimer());
243275
}
244276

245277
int millisecondsTillNextPoll = (int)((counterGroup._nextPollingTimeStamp - now).TotalMilliseconds);
246278
millisecondsTillNextPoll = Math.Max(1, millisecondsTillNextPoll);
247279
sleepDurationInMilliseconds = Math.Min(sleepDurationInMilliseconds, millisecondsTillNextPoll);
248280
}
249281
}
282+
foreach (Action onTimer in onTimers)
283+
{
284+
onTimer.Invoke();
285+
}
250286
if (sleepDurationInMilliseconds == int.MaxValue)
251287
{
252288
sleepDurationInMilliseconds = -1; // WaitOne uses -1 to mean infinite

0 commit comments

Comments
 (0)