From b69cb8cca9c9b01178d99dff57db6022db2e9645 Mon Sep 17 00:00:00 2001 From: Reuben Bond Date: Mon, 11 May 2026 16:44:38 -0700 Subject: [PATCH 1/2] Use membership versions for stale directory cleanup Evict LocalGrainDirectory directory and cache entries for terminating silos immediately, and only evict unknown-silo entries when the address was registered before the applied membership snapshot. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../GrainDirectory/LocalGrainDirectory.cs | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/Orleans.Runtime/GrainDirectory/LocalGrainDirectory.cs b/src/Orleans.Runtime/GrainDirectory/LocalGrainDirectory.cs index 154d5dcc36d..920ed15ae44 100644 --- a/src/Orleans.Runtime/GrainDirectory/LocalGrainDirectory.cs +++ b/src/Orleans.Runtime/GrainDirectory/LocalGrainDirectory.cs @@ -227,8 +227,8 @@ private void ApplyMembershipSnapshotCore(ClusterMembershipSnapshot snapshot) var addedSilos = GetMembershipDifference(targetMembership, previousMembership); ProcessSiloStatusChanges(snapshot, previousSnapshot, previousMembership); - AdjustLocalDirectory(targetMembership); - AdjustLocalCache(targetMembership); + AdjustLocalDirectory(snapshot); + AdjustLocalCache(snapshot, targetMembership); foreach (var silo in removedSilos) { @@ -365,14 +365,14 @@ private void OnSiloStatusChange(DirectoryMembership previousMembership, SiloAddr } } - private void AdjustLocalDirectory(DirectoryMembership targetMembership) + private void AdjustLocalDirectory(ClusterMembershipSnapshot snapshot) { var activationsToRemove = new List<(GrainId, ActivationId)>(); foreach (var entry in this.DirectoryPartition.GetItems()) { if (entry.Value.Activation is { } address) { - if (IsDefunctActivationSilo(targetMembership, address.SiloAddress)) + if (IsDefunctActivation(address, snapshot)) { activationsToRemove.Add((entry.Key, address.ActivationId)); } @@ -385,7 +385,7 @@ private void AdjustLocalDirectory(DirectoryMembership targetMembership) } } - private void AdjustLocalCache(DirectoryMembership targetMembership) + private void AdjustLocalCache(ClusterMembershipSnapshot snapshot, DirectoryMembership targetMembership) { foreach (var tuple in DirectoryCache.KeyValues) { @@ -398,16 +398,26 @@ private void AdjustLocalCache(DirectoryMembership targetMembership) continue; } - if (IsDefunctActivationSilo(targetMembership, activationAddress.SiloAddress)) + if (IsDefunctActivation(activationAddress, snapshot)) { DirectoryCache.Remove(activationAddress.GrainId); } } } - private static bool IsDefunctActivationSilo(DirectoryMembership targetMembership, SiloAddress? silo) + private static bool IsDefunctActivation(GrainAddress address, ClusterMembershipSnapshot snapshot) { - return silo is null || !targetMembership.MembershipCache.Contains(silo); + if (address.SiloAddress is not { } silo) + { + return true; + } + + if (snapshot.Members.TryGetValue(silo, out var member)) + { + return member.Status.IsTerminating(); + } + + return address.MembershipVersion < snapshot.Version; } internal SiloAddress? FindPredecessor(SiloAddress silo) From a8ef2487184725a0fd5f9e38869878d2e984b9cd Mon Sep 17 00:00:00 2001 From: Reuben Bond Date: Tue, 12 May 2026 09:34:38 -0700 Subject: [PATCH 2/2] Address directory cleanup review comments Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../GrainDirectoryHandoffManager.cs | 2 +- .../GrainDirectory/LocalGrainDirectory.cs | 30 ++++++++++++++++--- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/Orleans.Runtime/GrainDirectory/GrainDirectoryHandoffManager.cs b/src/Orleans.Runtime/GrainDirectory/GrainDirectoryHandoffManager.cs index bac6eb68558..82911e9ab6c 100644 --- a/src/Orleans.Runtime/GrainDirectory/GrainDirectoryHandoffManager.cs +++ b/src/Orleans.Runtime/GrainDirectory/GrainDirectoryHandoffManager.cs @@ -130,7 +130,7 @@ private async Task AcceptExistingRegistrationsAsync(List singleAct for (var i = singleActivations.Count - 1; i >= 0; i--) { - if (singleActivations[i].SiloAddress is not { } siloAddress || this.siloStatusOracle.GetApproximateSiloStatus(siloAddress).IsTerminating()) + if (singleActivations[i].SiloAddress is not { } siloAddress || this.siloStatusOracle.GetApproximateSiloStatus(siloAddress) == SiloStatus.Dead) { singleActivations.RemoveAt(i); } diff --git a/src/Orleans.Runtime/GrainDirectory/LocalGrainDirectory.cs b/src/Orleans.Runtime/GrainDirectory/LocalGrainDirectory.cs index 920ed15ae44..4391f2316f3 100644 --- a/src/Orleans.Runtime/GrainDirectory/LocalGrainDirectory.cs +++ b/src/Orleans.Runtime/GrainDirectory/LocalGrainDirectory.cs @@ -11,6 +11,7 @@ using Orleans.Configuration; using Orleans.GrainDirectory; using Orleans.Internal; +using Orleans.Runtime.Internal; using Orleans.Runtime.Scheduler; namespace Orleans.Runtime.GrainDirectory @@ -122,6 +123,7 @@ public void Start() LogDebugStart(); Running = true; + using var _ = new ExecutionContextSuppressor(); membershipUpdatesTask = Task.Run(() => ProcessMembershipUpdates(_membershipUpdatesCancellation.Token)); } @@ -141,14 +143,28 @@ public async Task StopAsync() //mark Running as false will exclude myself from CalculateGrainDirectoryPartition(grainId) Running = false; _membershipUpdatesCancellation.Cancel(); - if (membershipUpdatesTask is { } task) + try { - await task.SuppressThrowing(); + if (membershipUpdatesTask is { } task) + { + await task.SuppressThrowing(); + } + } + finally + { + _membershipUpdatesCancellation.Dispose(); } if (this.disposeDirectoryCache) { - await GrainDirectoryCacheFactory.DisposeGrainDirectoryCacheAsync(DirectoryCache).SuppressThrowing(); + try + { + await GrainDirectoryCacheFactory.DisposeGrainDirectoryCacheAsync(DirectoryCache); + } + catch (Exception exception) + { + LogWarningDisposeDirectoryCacheFailed(exception); + } } DirectoryPartition.Clear(); @@ -414,7 +430,7 @@ private static bool IsDefunctActivation(GrainAddress address, ClusterMembershipS if (snapshot.Members.TryGetValue(silo, out var member)) { - return member.Status.IsTerminating(); + return member.Status == SiloStatus.Dead; } return address.MembershipVersion < snapshot.Version; @@ -1027,6 +1043,12 @@ private readonly struct SiloHashLogValue(SiloAddress? silo) )] private partial void LogWarningRefreshingClusterMembershipFailed(Exception exception); + [LoggerMessage( + Level = LogLevel.Warning, + Message = "Failed to dispose the grain directory cache." + )] + private partial void LogWarningDisposeDirectoryCacheFailed(Exception exception); + [LoggerMessage( Level = LogLevel.Debug, Message = "Silo {LocalSilo} removed silo {OtherSilo}"