From f373f1a52445013bebc2ade452955071ac04e390 Mon Sep 17 00:00:00 2001 From: Chris Lange Date: Mon, 11 May 2026 19:45:55 -0600 Subject: [PATCH 1/6] fix(device): serialize ExecuteTextCommandAsync (closes #186) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a SemaphoreSlim around ExecuteTextCommandAsync so concurrent callers (e.g. simultaneous GetSdCardFilesAsync + DrainErrorQueueAsync + GetSystemInfoAsync) no longer race the protobuf-consumer pause/swap/restart sequence on the same stream. Without the lock, two callers could: - Spawn separate text consumers on the SAME underlying stream — each gets a random subset of the device's reply lines. - Interleave SCPI bytes on the wire — firmware sees intermixed bytes and may reply with parse errors. - Race the protobuf-consumer restart in the outer finally block. - Stop a consumer mid-restart (one caller's StopSafely targets the consumer the other caller just restarted). Implementation: - New `_textExchangeLock = new SemaphoreSlim(1, 1)` for mutual exclusion. Disposed in Dispose(). - New `_textExchangeOwnerThreadId` tracks the holding thread. A same-thread re-entrant call (a setupAction that itself calls ExecuteTextCommandAsync) would corrupt the consumer swap; we detect it BEFORE WaitAsync() and throw InvalidOperationException ("not re-entrant"). Strictly safer than the silent deadlock WaitAsync would otherwise produce. - ALL validation moved INSIDE the lock so a competing thread calling DisconnectAsync() / Dispose() during our WaitAsync() doesn't leave us with a stale _transport / _messageConsumer reference (TOCTOU close). - New disposed/disconnecting guard at the top of the locked body rejects calls during teardown — even before the existing IsConnected / transport-null checks. - Owner cleared and semaphore Released in the outer finally so validation failures can't leak the lock (verified in tests). 5 new tests in DaqifiDeviceTextCommandLockTests: - Same-thread owner → InvalidOperationException("not re-entrant") - _isDisconnecting=true → throws "disposing or disconnecting" - _disposed=true → throws "disposing or disconnecting" - Lock released after validation failure (second call doesn't deadlock) - Owner thread ID cleared after method returns (even on exception) Tests use reflection to set the relevant private fields — those guards run before any transport/consumer interaction, so this gives faithful coverage without standing up a real transport stack. Mirrors the equivalent fix already merged in daqifi-python-core PR #104 (execute_text_command + _text_exchange_lock + RLock + disposed/disconnecting guard + post-acquisition validation). 895 tests pass on net9.0 + net10.0 (was 890 baseline). --- .../DaqifiDeviceTextCommandLockTests.cs | 145 ++++++++++++++++++ src/Daqifi.Core/Device/DaqifiDevice.cs | 79 ++++++++-- 2 files changed, 211 insertions(+), 13 deletions(-) create mode 100644 src/Daqifi.Core.Tests/Device/DaqifiDeviceTextCommandLockTests.cs diff --git a/src/Daqifi.Core.Tests/Device/DaqifiDeviceTextCommandLockTests.cs b/src/Daqifi.Core.Tests/Device/DaqifiDeviceTextCommandLockTests.cs new file mode 100644 index 0000000..22bd406 --- /dev/null +++ b/src/Daqifi.Core.Tests/Device/DaqifiDeviceTextCommandLockTests.cs @@ -0,0 +1,145 @@ +using System; +using System.Net; +using System.Reflection; +using System.Threading.Tasks; +using Daqifi.Core.Device; +using Xunit; + +namespace Daqifi.Core.Tests.Device +{ + /// + /// Tests for #186 — ExecuteTextCommandAsync must serialize concurrent + /// callers (SemaphoreSlim), reject same-thread re-entrant calls + /// (InvalidOperationException, not deadlock), and reject calls when + /// the device is disposed or disconnecting. + /// + /// The protected method is exercised via a thin subclass that exposes + /// it. Because spinning up a real transport here would be fragile, the + /// pre-lock re-entrancy guard and the disposed/disconnecting guard are + /// tested by setting the relevant private fields via reflection — those + /// guards run before any transport / consumer interaction, so this gives + /// faithful coverage without a transport stack. + /// + public class DaqifiDeviceTextCommandLockTests + { + [Fact] + public async Task ExecuteTextCommandAsync_WhenSameThreadAlreadyOwnsLock_ThrowsInvalidOperation() + { + var device = new TextCommandTestableDevice("TestDevice"); + + // Simulate "we're already inside ExecuteTextCommandAsync on this + // thread" by directly setting the owner-thread tracker. The + // re-entrancy guard runs before WaitAsync(), so this check + // fires immediately without touching any transport state. + SetOwnerThreadId(device, Environment.CurrentManagedThreadId); + + var ex = await Assert.ThrowsAsync( + () => device.CallExecuteTextCommandAsync(() => { })); + Assert.Contains("not re-entrant", ex.Message); + } + + [Fact] + public async Task ExecuteTextCommandAsync_WhenDisposing_ThrowsInvalidOperation() + { + var device = new TextCommandTestableDevice("TestDevice"); + SetIsDisconnecting(device, true); + + var ex = await Assert.ThrowsAsync( + () => device.CallExecuteTextCommandAsync(() => { })); + Assert.Contains("disposing or disconnecting", ex.Message); + } + + [Fact] + public async Task ExecuteTextCommandAsync_WhenDisposed_ThrowsInvalidOperation() + { + var device = new TextCommandTestableDevice("TestDevice"); + SetDisposed(device, true); + + var ex = await Assert.ThrowsAsync( + () => device.CallExecuteTextCommandAsync(() => { })); + Assert.Contains("disposing or disconnecting", ex.Message); + } + + [Fact] + public async Task ExecuteTextCommandAsync_ReleasesLockAfterValidationFailure() + { + // After a validation failure (e.g. not connected), the lock + // must be released so subsequent calls don't hang. Verified + // by calling twice — second call must reach validation too, + // not block on WaitAsync. + var device = new TextCommandTestableDevice("TestDevice"); + + await Assert.ThrowsAsync( + () => device.CallExecuteTextCommandAsync(() => { })); + // Second call: also throws, but ONLY if the lock was released. + // If the lock leaked, this would deadlock and pytest's per-test + // budget would time it out instead. + await Assert.ThrowsAsync( + () => device.CallExecuteTextCommandAsync(() => { })); + } + + [Fact] + public async Task ExecuteTextCommandAsync_OwnerThreadIdClearedAfterReturn() + { + // Even when the call throws, the owner-thread tracker is + // cleared in the finally block so a subsequent call from the + // same thread doesn't false-positive the re-entrancy check. + var device = new TextCommandTestableDevice("TestDevice"); + + await Assert.ThrowsAsync( + () => device.CallExecuteTextCommandAsync(() => { })); + + Assert.Null(GetOwnerThreadId(device)); + } + + // ── Reflection helpers — kept private to this test class so the + // production DaqifiDevice doesn't have to expose internals. ───── + + private static void SetOwnerThreadId(DaqifiDevice device, int? value) + { + typeof(DaqifiDevice) + .GetField("_textExchangeOwnerThreadId", BindingFlags.Instance | BindingFlags.NonPublic)! + .SetValue(device, value); + } + + private static int? GetOwnerThreadId(DaqifiDevice device) + { + return (int?)typeof(DaqifiDevice) + .GetField("_textExchangeOwnerThreadId", BindingFlags.Instance | BindingFlags.NonPublic)! + .GetValue(device); + } + + private static void SetIsDisconnecting(DaqifiDevice device, bool value) + { + typeof(DaqifiDevice) + .GetField("_isDisconnecting", BindingFlags.Instance | BindingFlags.NonPublic)! + .SetValue(device, value); + } + + private static void SetDisposed(DaqifiDevice device, bool value) + { + typeof(DaqifiDevice) + .GetField("_disposed", BindingFlags.Instance | BindingFlags.NonPublic)! + .SetValue(device, value); + } + + /// + /// Subclass that exposes the protected ExecuteTextCommandAsync via + /// a public wrapper so tests can call it directly. Does NOT override + /// it — the real method runs, including the lock + guards. + /// + private class TextCommandTestableDevice : DaqifiDevice + { + public TextCommandTestableDevice(string name, IPAddress? ipAddress = null) + : base(name, ipAddress) + { + } + + public Task> CallExecuteTextCommandAsync( + Action setupAction) + { + return ExecuteTextCommandAsync(setupAction, responseTimeoutMs: 100, completionTimeoutMs: 50); + } + } + } +} diff --git a/src/Daqifi.Core/Device/DaqifiDevice.cs b/src/Daqifi.Core/Device/DaqifiDevice.cs index 62963bc..3a84289 100644 --- a/src/Daqifi.Core/Device/DaqifiDevice.cs +++ b/src/Daqifi.Core/Device/DaqifiDevice.cs @@ -80,6 +80,25 @@ public class DaqifiDevice : IDevice, IDisposable private bool _isDisconnecting; private bool _isInitialized; private readonly List _channels = new(); + + // Serializes ExecuteTextCommandAsync calls device-wide (closes #186). + // Multiple callers — e.g. concurrent GetSdCardFilesAsync / + // DrainErrorQueueAsync / GetSystemInfoAsync — would otherwise race the + // protobuf-consumer pause/swap/restart sequence on the same stream and + // either intermix SCPI bytes on the wire or interleave reply lines + // between callers' returned lists. SemaphoreSlim chosen over Lock + // because the method is async; counter is (1, 1) for mutual exclusion. + private readonly SemaphoreSlim _textExchangeLock = new(1, 1); + + // Thread that currently holds _textExchangeLock, or null when free. + // Lets ExecuteTextCommandAsync detect a same-thread re-entrant call + // (a setupAction that itself calls back into ExecuteTextCommandAsync) + // and throw InvalidOperationException instead of deadlocking on + // _textExchangeLock.WaitAsync(). Same safety guarantee — the + // re-entrant call would corrupt the consumer swap; better failure + // mode for callers (clean exception, stack trace) than a hung + // process (closes #186 follow-up). + private int? _textExchangeOwnerThreadId; /// /// Gets the current connection status of the device. @@ -328,26 +347,53 @@ protected virtual async Task> ExecuteTextCommandAsync( if (completionTimeoutMs <= 0) throw new ArgumentOutOfRangeException(nameof(completionTimeoutMs), completionTimeoutMs, "Timeout must be positive."); - var sw = Stopwatch.StartNew(); + cancellationToken.ThrowIfCancellationRequested(); - if (!IsConnected) + // Same-thread re-entrancy detection: a setupAction that calls + // ExecuteTextCommandAsync on the same device would corrupt the + // consumer swap mid-flight. Surface as a clean exception rather + // than wedging on _textExchangeLock.WaitAsync() forever. + var currentTid = Environment.CurrentManagedThreadId; + if (_textExchangeOwnerThreadId == currentTid) { - throw new InvalidOperationException("Device is not connected."); + throw new InvalidOperationException( + "ExecuteTextCommandAsync is not re-entrant on the same device; " + + "do not call it from inside a setupAction callback."); } - if (_transport == null) + await _textExchangeLock.WaitAsync(cancellationToken).ConfigureAwait(false); + _textExchangeOwnerThreadId = currentTid; + try { - throw new InvalidOperationException("ExecuteTextCommandAsync requires a transport-based connection."); - } + // All validation runs INSIDE the lock so a competing thread + // calling DisconnectAsync() / Dispose() while we're blocked + // on WaitAsync() doesn't leave us with a stale _transport / + // _messageConsumer reference (closes the TOCTOU window + // documented in #186). + if (_disposed || _isDisconnecting) + { + throw new InvalidOperationException( + "ExecuteTextCommandAsync cannot run while the device is " + + "disposing or disconnecting."); + } - cancellationToken.ThrowIfCancellationRequested(); + if (!IsConnected) + { + throw new InvalidOperationException("Device is not connected."); + } + + if (_transport == null) + { + throw new InvalidOperationException("ExecuteTextCommandAsync requires a transport-based connection."); + } - var collectedLines = new List(); - var stream = _transport.Stream; - int? originalReadTimeout = null; + var sw = Stopwatch.StartNew(); + var collectedLines = new List(); + var stream = _transport.Stream; + int? originalReadTimeout = null; - try - { + try + { if (stream.CanTimeout) { try @@ -468,7 +514,13 @@ protected virtual async Task> ExecuteTextCommandAsync( Trace.WriteLine($"[ExecuteTextCommandAsync] Total elapsed: {sw.ElapsedMilliseconds}ms"); } - return collectedLines; + return collectedLines; + } + finally + { + _textExchangeOwnerThreadId = null; + _textExchangeLock.Release(); + } } /// @@ -595,6 +647,7 @@ public void Dispose() _messageConsumer?.Dispose(); _messageProducer?.Dispose(); _transport?.Dispose(); + _textExchangeLock.Dispose(); _disposed = true; } } From 328e212e9b1c0b72e0a6c2a51d79271b9d43f764 Mon Sep 17 00:00:00 2001 From: Chris Lange Date: Mon, 11 May 2026 20:17:20 -0600 Subject: [PATCH 2/6] fix: Apply Qodo /agentic_review pass 1: harden ExecuteTextCommandAsync concurrency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three concurrency findings from Qodo on the lock-coordination work: 1. Re-entrancy guard could miss across async hops. The previous design captured Environment.CurrentManagedThreadId before await, but after ConfigureAwait(false) the resumed thread may differ — a setupAction callback re-entering ExecuteTextCommandAsync could then deadlock on _textExchangeLock.WaitAsync() instead of throwing. Replaced the int? owner-thread tracker with AsyncLocal, which flows through async resumptions regardless of which thread picks them up. 2. Disconnect() did not coordinate with the text-exchange lock. An in-flight ExecuteTextCommandAsync could be mid-swap (text consumer running, protobuf consumer not yet restarted) when Disconnect ripped the transport out from under it. Disconnect now waits up to 5s to acquire _textExchangeLock before tearing down. If the wait times out, teardown proceeds anyway — a stuck text exchange must not block disconnect forever. 3. Dispose() disposed _textExchangeLock while an in-flight call could still hold it; the in-flight call's finally Release() would then throw ObjectDisposedException and mask the real exception. Wrapped the Release in a try/catch so disposal-during-flight is treated as a benign teardown signal. Tests updated to flip the AsyncLocal flag instead of poking the removed int? field. 895/897 pass (2 skipped require live hardware). --- .../DaqifiDeviceTextCommandLockTests.cs | 54 ++++++------- src/Daqifi.Core/Device/DaqifiDevice.cs | 79 +++++++++++++++---- 2 files changed, 88 insertions(+), 45 deletions(-) diff --git a/src/Daqifi.Core.Tests/Device/DaqifiDeviceTextCommandLockTests.cs b/src/Daqifi.Core.Tests/Device/DaqifiDeviceTextCommandLockTests.cs index 22bd406..5673679 100644 --- a/src/Daqifi.Core.Tests/Device/DaqifiDeviceTextCommandLockTests.cs +++ b/src/Daqifi.Core.Tests/Device/DaqifiDeviceTextCommandLockTests.cs @@ -1,6 +1,7 @@ using System; using System.Net; using System.Reflection; +using System.Threading; using System.Threading.Tasks; using Daqifi.Core.Device; using Xunit; @@ -9,29 +10,29 @@ namespace Daqifi.Core.Tests.Device { /// /// Tests for #186 — ExecuteTextCommandAsync must serialize concurrent - /// callers (SemaphoreSlim), reject same-thread re-entrant calls - /// (InvalidOperationException, not deadlock), and reject calls when - /// the device is disposed or disconnecting. + /// callers (SemaphoreSlim), reject re-entrant calls from the same + /// async flow (InvalidOperationException, not deadlock), and reject + /// calls when the device is disposed or disconnecting. /// /// The protected method is exercised via a thin subclass that exposes - /// it. Because spinning up a real transport here would be fragile, the - /// pre-lock re-entrancy guard and the disposed/disconnecting guard are - /// tested by setting the relevant private fields via reflection — those - /// guards run before any transport / consumer interaction, so this gives - /// faithful coverage without a transport stack. + /// it. The disposed/disconnecting guards are tested by setting the + /// relevant private fields via reflection — those guards run before + /// any transport / consumer interaction, so this gives faithful + /// coverage without a transport stack. Re-entrancy is tested by + /// flipping the AsyncLocal flag from inside the same logical flow. /// public class DaqifiDeviceTextCommandLockTests { [Fact] - public async Task ExecuteTextCommandAsync_WhenSameThreadAlreadyOwnsLock_ThrowsInvalidOperation() + public async Task ExecuteTextCommandAsync_WhenAlreadyInsideAsyncFlow_ThrowsInvalidOperation() { var device = new TextCommandTestableDevice("TestDevice"); // Simulate "we're already inside ExecuteTextCommandAsync on this - // thread" by directly setting the owner-thread tracker. The - // re-entrancy guard runs before WaitAsync(), so this check - // fires immediately without touching any transport state. - SetOwnerThreadId(device, Environment.CurrentManagedThreadId); + // async flow" by setting the AsyncLocal flag. The re-entrancy + // guard runs before WaitAsync(), so this check fires immediately + // without touching any transport state. + GetIsInsideTextExchange(device).Value = true; var ex = await Assert.ThrowsAsync( () => device.CallExecuteTextCommandAsync(() => { })); @@ -72,41 +73,34 @@ public async Task ExecuteTextCommandAsync_ReleasesLockAfterValidationFailure() await Assert.ThrowsAsync( () => device.CallExecuteTextCommandAsync(() => { })); // Second call: also throws, but ONLY if the lock was released. - // If the lock leaked, this would deadlock and pytest's per-test + // If the lock leaked, this would deadlock and xunit's per-test // budget would time it out instead. await Assert.ThrowsAsync( () => device.CallExecuteTextCommandAsync(() => { })); } [Fact] - public async Task ExecuteTextCommandAsync_OwnerThreadIdClearedAfterReturn() + public async Task ExecuteTextCommandAsync_AsyncLocalClearedAfterReturn() { - // Even when the call throws, the owner-thread tracker is - // cleared in the finally block so a subsequent call from the - // same thread doesn't false-positive the re-entrancy check. + // Even when the call throws, the AsyncLocal re-entrancy flag + // is cleared in the finally block so a subsequent call from + // the same flow doesn't false-positive the re-entrancy check. var device = new TextCommandTestableDevice("TestDevice"); await Assert.ThrowsAsync( () => device.CallExecuteTextCommandAsync(() => { })); - Assert.Null(GetOwnerThreadId(device)); + Assert.False(GetIsInsideTextExchange(device).Value); } // ── Reflection helpers — kept private to this test class so the // production DaqifiDevice doesn't have to expose internals. ───── - private static void SetOwnerThreadId(DaqifiDevice device, int? value) + private static AsyncLocal GetIsInsideTextExchange(DaqifiDevice device) { - typeof(DaqifiDevice) - .GetField("_textExchangeOwnerThreadId", BindingFlags.Instance | BindingFlags.NonPublic)! - .SetValue(device, value); - } - - private static int? GetOwnerThreadId(DaqifiDevice device) - { - return (int?)typeof(DaqifiDevice) - .GetField("_textExchangeOwnerThreadId", BindingFlags.Instance | BindingFlags.NonPublic)! - .GetValue(device); + return (AsyncLocal)typeof(DaqifiDevice) + .GetField("_isInsideTextExchange", BindingFlags.Instance | BindingFlags.NonPublic)! + .GetValue(device)!; } private static void SetIsDisconnecting(DaqifiDevice device, bool value) diff --git a/src/Daqifi.Core/Device/DaqifiDevice.cs b/src/Daqifi.Core/Device/DaqifiDevice.cs index 3a84289..dc4322c 100644 --- a/src/Daqifi.Core/Device/DaqifiDevice.cs +++ b/src/Daqifi.Core/Device/DaqifiDevice.cs @@ -90,15 +90,16 @@ public class DaqifiDevice : IDevice, IDisposable // because the method is async; counter is (1, 1) for mutual exclusion. private readonly SemaphoreSlim _textExchangeLock = new(1, 1); - // Thread that currently holds _textExchangeLock, or null when free. - // Lets ExecuteTextCommandAsync detect a same-thread re-entrant call - // (a setupAction that itself calls back into ExecuteTextCommandAsync) - // and throw InvalidOperationException instead of deadlocking on - // _textExchangeLock.WaitAsync(). Same safety guarantee — the - // re-entrant call would corrupt the consumer swap; better failure - // mode for callers (clean exception, stack trace) than a hung - // process (closes #186 follow-up). - private int? _textExchangeOwnerThreadId; + // Async-context flag that tracks whether the current logical flow + // already holds _textExchangeLock. AsyncLocal flows across await + // resumptions on different threads, so a setupAction that re-enters + // ExecuteTextCommandAsync after a ConfigureAwait(false) hop is still + // detected and surfaced as InvalidOperationException — instead of + // wedging on _textExchangeLock.WaitAsync() (the re-entrant call + // would corrupt the consumer swap mid-flight). Plain + // Environment.CurrentManagedThreadId capture wouldn't work — the + // value seen before await may not match the value seen after. + private readonly AsyncLocal _isInsideTextExchange = new(); /// /// Gets the current connection status of the device. @@ -217,9 +218,33 @@ public void Connect() /// /// Disconnects from the device. /// + /// + /// Waits up to 5 seconds to acquire _textExchangeLock before + /// tearing down the consumer / producer / transport. This prevents + /// a race where an in-flight + /// is mid-swap (text consumer running on the stream, protobuf + /// consumer not yet restarted) and Disconnect rips the transport + /// out from under it. If the wait times out, Disconnect proceeds + /// anyway — a stuck text exchange must not block teardown forever. + /// public void Disconnect() { _isDisconnecting = true; + // Best-effort coordination with ExecuteTextCommandAsync. We do + // NOT release this lock — Dispose() disposes the semaphore + // shortly after, and any in-flight text exchange will see + // _isDisconnecting on its own validation path or take the + // ObjectDisposedException catch in its Release(). + var lockAcquired = false; + try + { + lockAcquired = _textExchangeLock.Wait(TimeSpan.FromSeconds(5)); + } + catch (ObjectDisposedException) + { + // Disconnect called after Dispose — nothing to coordinate. + } + try { // Unsubscribe from message consumer events @@ -241,6 +266,16 @@ public void Disconnect() State = DeviceState.Disconnected; _isInitialized = false; _isDisconnecting = false; + if (lockAcquired) + { + try + { + _textExchangeLock.Release(); + } + catch (ObjectDisposedException) + { + } + } } } @@ -349,12 +384,14 @@ protected virtual async Task> ExecuteTextCommandAsync( cancellationToken.ThrowIfCancellationRequested(); - // Same-thread re-entrancy detection: a setupAction that calls + // Async-context re-entrancy detection: a setupAction that calls // ExecuteTextCommandAsync on the same device would corrupt the // consumer swap mid-flight. Surface as a clean exception rather // than wedging on _textExchangeLock.WaitAsync() forever. - var currentTid = Environment.CurrentManagedThreadId; - if (_textExchangeOwnerThreadId == currentTid) + // AsyncLocal flows across await thread hops so this catches + // re-entry even when the inner call resumes on a different + // thread than the outer call. + if (_isInsideTextExchange.Value) { throw new InvalidOperationException( "ExecuteTextCommandAsync is not re-entrant on the same device; " @@ -362,7 +399,7 @@ protected virtual async Task> ExecuteTextCommandAsync( } await _textExchangeLock.WaitAsync(cancellationToken).ConfigureAwait(false); - _textExchangeOwnerThreadId = currentTid; + _isInsideTextExchange.Value = true; try { // All validation runs INSIDE the lock so a competing thread @@ -518,8 +555,20 @@ protected virtual async Task> ExecuteTextCommandAsync( } finally { - _textExchangeOwnerThreadId = null; - _textExchangeLock.Release(); + _isInsideTextExchange.Value = false; + // Release can race with Dispose() — Dispose acquires the lock + // before disposing it, but if that acquisition timed out and + // Dispose proceeded anyway, our SemaphoreSlim handle is now + // gone. Treat that as a benign teardown signal rather than + // surfacing it from the finally and masking the original + // exception (if any) from the try body. + try + { + _textExchangeLock.Release(); + } + catch (ObjectDisposedException) + { + } } } From f7efc6cc8a74c7ef277a49a4484b366255966eb8 Mon Sep 17 00:00:00 2001 From: Chris Lange Date: Mon, 11 May 2026 20:21:05 -0600 Subject: [PATCH 3/6] fix: Apply Qodo /improve pass 2: handle WaitAsync ObjectDisposedException Qodo flagged the symmetric race to the post-acquisition disposal handling: _textExchangeLock.WaitAsync() itself can throw ObjectDisposedException if Dispose() ran ahead of an arriving caller. Without a catch, the low-level teardown exception leaks to callers instead of the InvalidOperationException they get from the post-acquisition _disposed check. Wrap the WaitAsync and translate to the same "device is disposed" failure mode for consistency. --- src/Daqifi.Core/Device/DaqifiDevice.cs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/Daqifi.Core/Device/DaqifiDevice.cs b/src/Daqifi.Core/Device/DaqifiDevice.cs index dc4322c..32ec63e 100644 --- a/src/Daqifi.Core/Device/DaqifiDevice.cs +++ b/src/Daqifi.Core/Device/DaqifiDevice.cs @@ -398,7 +398,20 @@ protected virtual async Task> ExecuteTextCommandAsync( + "do not call it from inside a setupAction callback."); } - await _textExchangeLock.WaitAsync(cancellationToken).ConfigureAwait(false); + try + { + await _textExchangeLock.WaitAsync(cancellationToken).ConfigureAwait(false); + } + catch (ObjectDisposedException) + { + // Dispose() raced ahead of us and disposed the semaphore. + // Surface the same clean failure as the post-acquisition + // _disposed check below, instead of leaking a low-level + // teardown exception to callers. + throw new InvalidOperationException( + "ExecuteTextCommandAsync cannot run because the device is disposed."); + } + _isInsideTextExchange.Value = true; try { From 7dc53d303c637f1965a3cc9d04775ecc7f36b7d1 Mon Sep 17 00:00:00 2001 From: Chris Lange Date: Mon, 11 May 2026 20:28:52 -0600 Subject: [PATCH 4/6] fix: Apply Qodo /agentic_review pass 3 on PR #196: shorter Disconnect, ConfigureAwait, fix comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two findings from Qodo review on the prior pass: 1. Disconnect blocks up to 5s. Reduced the SemaphoreSlim.Wait timeout from 5 to 1 second. The 1s budget covers the longest delay any normal text exchange can hold the lock for (responseTimeoutMs default + safety margin), without making sync Disconnect calls stall callers for an unreasonable wait. Callers wanting truly non-blocking disconnect should drive it from Task.Run. Combined with this: the two Task.Delay(50) calls inside ExecuteTextCommandAsync now use ConfigureAwait(false). Without it, the awaits resume on the captured sync context — and a sync Disconnect() called on that same UI thread would deadlock waiting for a lock the exchange can't release because its continuation can't reach the finally. 2. Misleading lock release comment. The previous comment said "We do NOT release this lock", which contradicted the finally block that does release on lockAcquired. Rewrote the comment to describe the actual contract. --- src/Daqifi.Core/Device/DaqifiDevice.cs | 31 ++++++++++++++++++-------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/Daqifi.Core/Device/DaqifiDevice.cs b/src/Daqifi.Core/Device/DaqifiDevice.cs index 32ec63e..d6d5393 100644 --- a/src/Daqifi.Core/Device/DaqifiDevice.cs +++ b/src/Daqifi.Core/Device/DaqifiDevice.cs @@ -219,26 +219,32 @@ public void Connect() /// Disconnects from the device. /// /// - /// Waits up to 5 seconds to acquire _textExchangeLock before + /// Waits up to 1 second to acquire _textExchangeLock before /// tearing down the consumer / producer / transport. This prevents /// a race where an in-flight /// is mid-swap (text consumer running on the stream, protobuf /// consumer not yet restarted) and Disconnect rips the transport /// out from under it. If the wait times out, Disconnect proceeds /// anyway — a stuck text exchange must not block teardown forever. + /// The 1s budget is the longest delay any normal text exchange + /// can hold the lock for (responseTimeoutMs default + safety + /// margin); callers wanting non-blocking disconnect should drive + /// this off a Task.Run. /// public void Disconnect() { _isDisconnecting = true; - // Best-effort coordination with ExecuteTextCommandAsync. We do - // NOT release this lock — Dispose() disposes the semaphore - // shortly after, and any in-flight text exchange will see - // _isDisconnecting on its own validation path or take the - // ObjectDisposedException catch in its Release(). + // Best-effort coordination with ExecuteTextCommandAsync — + // acquire the lock so we don't tear the transport out from + // under an in-flight text exchange. The lock IS released in + // the finally below when acquired (so a future Connect() + // followed by ExecuteTextCommandAsync isn't blocked); a + // stuck exchange that holds past the timeout drops to the + // _isDisconnecting validation path inside the exchange. var lockAcquired = false; try { - lockAcquired = _textExchangeLock.Wait(TimeSpan.FromSeconds(5)); + lockAcquired = _textExchangeLock.Wait(TimeSpan.FromSeconds(1)); } catch (ObjectDisposedException) { @@ -484,7 +490,11 @@ protected virtual async Task> ExecuteTextCommandAsync( }; textConsumer.Start(); - await Task.Delay(50, cancellationToken); + // ConfigureAwait(false): the lock is held across this delay, + // so resuming on a captured sync context (e.g. UI thread) + // would let a sync Disconnect() call from that same thread + // deadlock waiting for the lock we hold. + await Task.Delay(50, cancellationToken).ConfigureAwait(false); Trace.WriteLine($"[ExecuteTextCommandAsync] Text consumer started at {sw.ElapsedMilliseconds}ms"); @@ -504,7 +514,10 @@ protected virtual async Task> ExecuteTextCommandAsync( while (DateTime.UtcNow - startTime < maxWait) { var previousCount = collectedLines.Count; - await Task.Delay(50, cancellationToken); + // ConfigureAwait(false): see textConsumer.Start above — + // we still hold _textExchangeLock and must not resume on + // a captured sync context. + await Task.Delay(50, cancellationToken).ConfigureAwait(false); if (collectedLines.Count > previousCount) { lastMessageTime = DateTime.UtcNow; From 2f23625bf69499440b39df76474a89e4058ab8ae Mon Sep 17 00:00:00 2001 From: Chris Lange Date: Tue, 12 May 2026 13:14:10 -0600 Subject: [PATCH 5/6] Apply Qodo /agentic_review pass 2: bump Disconnect lock wait to 10s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 1-second wait was shorter than the worst-case ExecuteTextCommandAsync hold time (StopSafely up to 1s + maxWait of responseTimeoutMs*5 = 5s by default, longer with custom timeouts), so Disconnect/Dispose could rip the transport out from under an in-flight text exchange — reintroducing the race the lock is meant to prevent. 10s budget covers the default-timeout case with margin and most custom callers; on timeout the in-flight exchange still bails cleanly via the post-acquisition _isDisconnecting check, so teardown stays bounded. --- src/Daqifi.Core/Device/DaqifiDevice.cs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Daqifi.Core/Device/DaqifiDevice.cs b/src/Daqifi.Core/Device/DaqifiDevice.cs index d6d5393..0cd911d 100644 --- a/src/Daqifi.Core/Device/DaqifiDevice.cs +++ b/src/Daqifi.Core/Device/DaqifiDevice.cs @@ -219,17 +219,20 @@ public void Connect() /// Disconnects from the device. /// /// - /// Waits up to 1 second to acquire _textExchangeLock before + /// Waits up to 10 seconds to acquire _textExchangeLock before /// tearing down the consumer / producer / transport. This prevents /// a race where an in-flight /// is mid-swap (text consumer running on the stream, protobuf /// consumer not yet restarted) and Disconnect rips the transport /// out from under it. If the wait times out, Disconnect proceeds /// anyway — a stuck text exchange must not block teardown forever. - /// The 1s budget is the longest delay any normal text exchange - /// can hold the lock for (responseTimeoutMs default + safety - /// margin); callers wanting non-blocking disconnect should drive - /// this off a Task.Run. + /// The 10s budget covers the worst-case ExecuteTextCommandAsync + /// hold time with default timeouts (StopSafely up to 1s + maxWait + /// of responseTimeoutMs*5 = 5s by default + safety margin) and + /// most custom-timeout callers; on timeout the in-flight exchange + /// sees _isDisconnecting == true via the post-acquisition + /// validation and bails out cleanly. Callers wanting non-blocking + /// disconnect should drive this off a Task.Run. /// public void Disconnect() { @@ -244,7 +247,7 @@ public void Disconnect() var lockAcquired = false; try { - lockAcquired = _textExchangeLock.Wait(TimeSpan.FromSeconds(1)); + lockAcquired = _textExchangeLock.Wait(TimeSpan.FromSeconds(10)); } catch (ObjectDisposedException) { From cee5f172345fc72f3dfd0175e925a6cc5b06fba4 Mon Sep 17 00:00:00 2001 From: Tyler Kron Date: Thu, 14 May 2026 09:39:20 -0600 Subject: [PATCH 6/6] style: fix indentation of inner try/finally in ExecuteTextCommandAsync The lock refactor added an outer try block but didn't re-indent the inner try/finally body, leaving braces and content misaligned by one level. Also consolidates the two identical ConfigureAwait(false) comments in the polling loop into one on the first occurrence. Co-Authored-By: Claude Sonnet 4.6 --- src/Daqifi.Core/Device/DaqifiDevice.cs | 181 ++++++++++++------------- 1 file changed, 88 insertions(+), 93 deletions(-) diff --git a/src/Daqifi.Core/Device/DaqifiDevice.cs b/src/Daqifi.Core/Device/DaqifiDevice.cs index 0cd911d..53ac332 100644 --- a/src/Daqifi.Core/Device/DaqifiDevice.cs +++ b/src/Daqifi.Core/Device/DaqifiDevice.cs @@ -453,133 +453,128 @@ protected virtual async Task> ExecuteTextCommandAsync( try { - if (stream.CanTimeout) - { - try - { - originalReadTimeout = stream.ReadTimeout; - stream.ReadTimeout = Math.Min(500, Math.Max(100, responseTimeoutMs / 4)); - } - catch + if (stream.CanTimeout) { - // Some streams may not allow setting read timeout; ignore. - originalReadTimeout = null; + try + { + originalReadTimeout = stream.ReadTimeout; + stream.ReadTimeout = Math.Min(500, Math.Max(100, responseTimeoutMs / 4)); + } + catch + { + // Some streams may not allow setting read timeout; ignore. + originalReadTimeout = null; + } } - } - // Stop the protobuf consumer so it doesn't compete for stream bytes. - // The serial transport sets ReadTimeout=500ms after connect, so the - // consumer thread's blocking Read will unblock within 500ms. - if (_messageConsumer != null) - { - _messageConsumer.MessageReceived -= OnInboundMessageReceived; - var stopped = _messageConsumer.StopSafely(timeoutMs: 1000); - if (!stopped) + // Stop the protobuf consumer so it doesn't compete for stream bytes. + // The serial transport sets ReadTimeout=500ms after connect, so the + // consumer thread's blocking Read will unblock within 500ms. + if (_messageConsumer != null) { - _messageConsumer.Stop(); + _messageConsumer.MessageReceived -= OnInboundMessageReceived; + var stopped = _messageConsumer.StopSafely(timeoutMs: 1000); + if (!stopped) + { + _messageConsumer.Stop(); + } } - } - Trace.WriteLine($"[ExecuteTextCommandAsync] Protobuf consumer stopped at {sw.ElapsedMilliseconds}ms"); + Trace.WriteLine($"[ExecuteTextCommandAsync] Protobuf consumer stopped at {sw.ElapsedMilliseconds}ms"); - // Create a temporary text consumer on the same stream - using var textConsumer = new StreamMessageConsumer( - _transport.Stream, - new LineBasedMessageParser()); + // Create a temporary text consumer on the same stream + using var textConsumer = new StreamMessageConsumer( + _transport.Stream, + new LineBasedMessageParser()); - textConsumer.MessageReceived += (_, e) => - { - collectedLines.Add(e.Message.Data); - }; + textConsumer.MessageReceived += (_, e) => + { + collectedLines.Add(e.Message.Data); + }; - textConsumer.Start(); - // ConfigureAwait(false): the lock is held across this delay, - // so resuming on a captured sync context (e.g. UI thread) - // would let a sync Disconnect() call from that same thread - // deadlock waiting for the lock we hold. - await Task.Delay(50, cancellationToken).ConfigureAwait(false); + textConsumer.Start(); + // ConfigureAwait(false): the lock is held, so resuming on a captured + // sync context (e.g. UI thread) would deadlock if that thread calls Disconnect(). + await Task.Delay(50, cancellationToken).ConfigureAwait(false); - Trace.WriteLine($"[ExecuteTextCommandAsync] Text consumer started at {sw.ElapsedMilliseconds}ms"); + Trace.WriteLine($"[ExecuteTextCommandAsync] Text consumer started at {sw.ElapsedMilliseconds}ms"); - // Execute the setup action (sends SCPI commands) - setupAction(); + // Execute the setup action (sends SCPI commands) + setupAction(); - Trace.WriteLine($"[ExecuteTextCommandAsync] Setup action completed at {sw.ElapsedMilliseconds}ms"); + Trace.WriteLine($"[ExecuteTextCommandAsync] Setup action completed at {sw.ElapsedMilliseconds}ms"); - // Wait for responses using a two-phase inactivity-based timeout: - // Phase 1: Wait up to responseTimeoutMs for the first response. - // Phase 2: After receiving data, wait completionTimeoutMs of inactivity to finish. - var lastMessageTime = DateTime.UtcNow; - var maxWait = TimeSpan.FromMilliseconds(responseTimeoutMs * 5); - var startTime = DateTime.UtcNow; - var hasReceivedAny = false; + // Wait for responses using a two-phase inactivity-based timeout: + // Phase 1: Wait up to responseTimeoutMs for the first response. + // Phase 2: After receiving data, wait completionTimeoutMs of inactivity to finish. + var lastMessageTime = DateTime.UtcNow; + var maxWait = TimeSpan.FromMilliseconds(responseTimeoutMs * 5); + var startTime = DateTime.UtcNow; + var hasReceivedAny = false; - while (DateTime.UtcNow - startTime < maxWait) - { - var previousCount = collectedLines.Count; - // ConfigureAwait(false): see textConsumer.Start above — - // we still hold _textExchangeLock and must not resume on - // a captured sync context. - await Task.Delay(50, cancellationToken).ConfigureAwait(false); - if (collectedLines.Count > previousCount) + while (DateTime.UtcNow - startTime < maxWait) { - lastMessageTime = DateTime.UtcNow; - if (!hasReceivedAny) + var previousCount = collectedLines.Count; + await Task.Delay(50, cancellationToken).ConfigureAwait(false); + if (collectedLines.Count > previousCount) { - hasReceivedAny = true; - Trace.WriteLine($"[ExecuteTextCommandAsync] First response at {sw.ElapsedMilliseconds}ms"); + lastMessageTime = DateTime.UtcNow; + if (!hasReceivedAny) + { + hasReceivedAny = true; + Trace.WriteLine($"[ExecuteTextCommandAsync] First response at {sw.ElapsedMilliseconds}ms"); + } } - } - var elapsed = DateTime.UtcNow - lastMessageTime; + var elapsed = DateTime.UtcNow - lastMessageTime; - if (hasReceivedAny) - { - // Phase 2: short completion timeout after first data - if (elapsed >= TimeSpan.FromMilliseconds(completionTimeoutMs)) + if (hasReceivedAny) { - break; + // Phase 2: short completion timeout after first data + if (elapsed >= TimeSpan.FromMilliseconds(completionTimeoutMs)) + { + break; + } } - } - else - { - // Phase 1: full initial timeout waiting for first data - if (elapsed >= TimeSpan.FromMilliseconds(responseTimeoutMs)) + else { - break; + // Phase 1: full initial timeout waiting for first data + if (elapsed >= TimeSpan.FromMilliseconds(responseTimeoutMs)) + { + break; + } } } - } - Trace.WriteLine($"[ExecuteTextCommandAsync] Collection complete at {sw.ElapsedMilliseconds}ms, {collectedLines.Count} lines"); + Trace.WriteLine($"[ExecuteTextCommandAsync] Collection complete at {sw.ElapsedMilliseconds}ms, {collectedLines.Count} lines"); - // Stop the text consumer - textConsumer.StopSafely(); - } - finally - { - if (originalReadTimeout.HasValue && stream.CanTimeout) + // Stop the text consumer + textConsumer.StopSafely(); + } + finally { - try + if (originalReadTimeout.HasValue && stream.CanTimeout) { - stream.ReadTimeout = originalReadTimeout.Value; + try + { + stream.ReadTimeout = originalReadTimeout.Value; + } + catch + { + // Ignore failures when restoring timeout. + } } - catch + + // Restart the protobuf consumer + if (_messageConsumer != null) { - // Ignore failures when restoring timeout. + _messageConsumer.Start(); + _messageConsumer.MessageReceived += OnInboundMessageReceived; } - } - // Restart the protobuf consumer - if (_messageConsumer != null) - { - _messageConsumer.Start(); - _messageConsumer.MessageReceived += OnInboundMessageReceived; + Trace.WriteLine($"[ExecuteTextCommandAsync] Total elapsed: {sw.ElapsedMilliseconds}ms"); } - Trace.WriteLine($"[ExecuteTextCommandAsync] Total elapsed: {sw.ElapsedMilliseconds}ms"); - } - return collectedLines; } finally