diff --git a/MULTINODE_TEST_ASYNC_MIGRATION.md b/MULTINODE_TEST_ASYNC_MIGRATION.md new file mode 100644 index 00000000000..7d03b8e4eaf --- /dev/null +++ b/MULTINODE_TEST_ASYNC_MIGRATION.md @@ -0,0 +1,254 @@ +# Multi-Node Test Async Migration Guide + +## Overview +This guide helps migrate multi-node tests from blocking synchronous calls to async/await patterns to prevent thread pool starvation and test timeouts in CI environments. + +## Why This Migration Is Necessary +- **Root Cause**: Blocking `.Wait()` calls on TestConductor operations cause thread pool starvation +- **Symptoms**: 20+ second timeout failures in CI environments +- **Solution**: Replace all blocking calls with proper async/await patterns + +## Migration Patterns to Look For + +### 1. TestConductor Blocking Calls +**Look for these patterns:** +```csharp +// OLD - Blocking +TestConductor.Exit(role, 0).Wait(); +TestConductor.Blackhole(node1, node2, direction).Wait(); +TestConductor.PassThrough(node1, node2, direction).Wait(); +TestConductor.Throttle(node1, node2, direction, rate).Wait(); +TestConductor.Disconnect(node1, node2).Wait(); +TestConductor.Shutdown(node, abort).Wait(); +TestConductor.RemoveNode(node).Wait(); + +// NEW - Async +await TestConductor.ExitAsync(role, 0); +await TestConductor.BlackholeAsync(node1, node2, direction); +await TestConductor.PassThroughAsync(node1, node2, direction); +await TestConductor.ThrottleAsync(node1, node2, direction, rate); +await TestConductor.DisconnectAsync(node1, node2); +await TestConductor.ShutdownAsync(node, abort); +await TestConductor.RemoveNodeAsync(node); +``` + +### 2. Barrier Synchronization +**Look for:** +```csharp +// OLD +EnterBarrier("barrier-name"); +EnterBarrier("barrier-1", "barrier-2"); + +// NEW +await EnterBarrierAsync("barrier-name"); +await EnterBarrierAsync("barrier-1", "barrier-2"); +``` + +### 3. RunOn with Async Operations +**Look for:** +```csharp +// OLD +RunOn(() => { + TestConductor.Exit(role, 0).Wait(); +}, roles); + +// NEW +await RunOnAsync(async () => { + await TestConductor.ExitAsync(role, 0); +}, roles); +``` + +### 4. Within Blocks +**Look for:** +```csharp +// OLD +Within(TimeSpan.FromSeconds(30), () => { + // operations + EnterBarrier("done"); +}); + +// NEW +await WithinAsync(TimeSpan.FromSeconds(30), async () => { + // operations + await EnterBarrierAsync("done"); +}); +``` + +### 5. Test Method Signatures +**Change:** +```csharp +// OLD +[MultiNodeFact] +public void TestName() + +// NEW +[MultiNodeFact] +public async Task TestName() +``` + +### 6. Helper Method Signatures +**Change:** +```csharp +// OLD +public void HelperMethod() + +// NEW +public async Task HelperMethod() +``` + +## Required Imports +Add if missing: +```csharp +using System.Threading.Tasks; +``` + +## Migration Checklist + +### ✅ Completed Tests +- [x] StressSpec +- [x] LeaderElectionSpec +- [x] ClusterAccrualFailureDetectorSpec +- [x] TestConductorSpec (in Remote.Tests.MultiNode) +- [x] RemoteNodeDeathWatchSpec (in Remote.Tests.MultiNode) + +### Core Tests - Akka.Cluster.Tests.MultiNode +- [ ] AttemptSysMsgRedeliverySpec +- [ ] ClientDowningNodeThatIsUnreachableSpec +- [ ] ClusterDeathWatchSpec +- [ ] ConvergenceSpec +- [ ] LeaderDowningAllOtherNodesSpec +- [ ] LeaderDowningNodeThatIsUnreachableSpec +- [ ] SingletonClusterSpec +- [ ] SplitBrainResolverDowningSpec +- [ ] SplitBrainSpec +- [ ] SurviveNetworkInstabilitySpec +- [ ] UnreachableNodeJoinsAgainSpec + +### Core Tests - Akka.Cluster.Tests.MultiNode/Routing +- [ ] ClusterRoundRobinSpec + +### Core Tests - Akka.Cluster.Tests.MultiNode/SBR (Split Brain Resolver) +- [ ] DownAllIndirectlyConnected5NodeSpec +- [ ] DownAllUnstable5NodeSpec +- [ ] IndirectlyConnected3NodeSpec +- [ ] IndirectlyConnected5NodeSpec +- [ ] LeaseMajority5NodeSpec + +### Core Tests - Akka.Remote.Tests.MultiNode +- [ ] RemoteNodeRestartGateSpec +- [ ] RemoteNodeShutdownAndComesBackSpec +- [ ] RemoteReDeploymentSpec +- [ ] RemoteRestartedQuarantinedSpec + +### Contrib Tests - Akka.Cluster.Sharding.Tests.MultiNode +- [ ] ClusterShardCoordinatorDowning2Spec +- [ ] ClusterShardCoordinatorDowningSpec +- [ ] ClusterShardingFailureSpec +- [ ] ClusterShardingRememberEntitiesNewExtractorSpec +- [ ] ClusterShardingRememberEntitiesSpec +- [ ] ClusterShardingSingleShardPerEntitySpec +- [ ] ClusterShardingSpec + +### Contrib Tests - Akka.Cluster.Tools.Tests.MultiNode +- [ ] ClusterClient/ClusterClientDiscoverySpec +- [ ] ClusterClient/ClusterClientSpec +- [ ] PublishSubscribe/DistributedPubSubMediatorSpec +- [x] PublishSubscribe/DistributedPubSubRestartSpec +- [ ] Singleton/ClusterSingletonManagerDownedSpec +- [ ] Singleton/ClusterSingletonManagerSpec + +### Tests That May Need EnterBarrier -> EnterBarrierAsync Migration +Additional tests that use EnterBarrier but may not have TestConductor blocking calls still need to be converted for consistency. Run this to find them: +```bash +find src -name "*.cs" -path "*Tests.MultiNode*" -exec grep -l "EnterBarrier(" {} \; +``` + +## Migration Steps + +1. **Add async Task import** + ```csharp + using System.Threading.Tasks; + ``` + +2. **Convert test method signature** + - Change `public void` to `public async Task` + +3. **Find and replace blocking patterns** + - Search for `.Wait()` calls + - Search for `EnterBarrier(` + - Search for `Within(` + - Search for `RunOn(` with async operations inside + +4. **Update method calls** + - Add `await` keyword before async calls + - Change method names to async versions (add `Async` suffix) + - Update lambdas to `async` when needed + +5. **Update helper methods** + - Convert any helper methods that now contain async calls + - Propagate async/await up the call chain + +6. **Build and verify** + ```bash + dotnet build src/core/Akka.Cluster.Tests.MultiNode/Akka.Cluster.Tests.MultiNode.csproj -c Release + ``` + +7. **Run tests (example)** + ```bash + dotnet test src/core/Akka.Cluster.Tests.MultiNode/Akka.Cluster.Tests.MultiNode.csproj \ + -c Release --filter "FullyQualifiedName~YourTestName" --framework net8.0 + ``` + +## Common Pitfalls to Avoid + +1. **Don't use ConfigureAwait(false) in tests** + - Tests should maintain their synchronization context + +2. **Don't use GetAwaiter().GetResult()** + - This is just as bad as .Wait() for blocking + +3. **Ensure all async operations are awaited** + - Missing awaits can cause race conditions + +4. **Watch for nested RunOn calls** + - Inner RunOn may need to become RunOnAsync if it contains async operations + +5. **Don't forget lambda async modifiers** + ```csharp + // Wrong + ReportResult(() => { await SomeAsync(); }); + + // Right + ReportResult(async () => { await SomeAsync(); }); + ``` + +## Verification Commands + +Check for remaining blocking calls: +```bash +# Find .Wait() calls +grep -r "\.Wait()" src --include="*.cs" | grep -i multinode + +# Find EnterBarrier calls +grep -r "EnterBarrier(" src --include="*.cs" | grep -i multinode + +# Find TestConductor blocking calls +grep -r "TestConductor\.[A-Z].*\.Wait()" src --include="*.cs" +``` + +## Git Commit Message Template +``` +Convert [TestName] to async + +- Convert main test method to async Task +- Replace TestConductor.[Method]().Wait() with await TestConductor.[Method]Async() +- Replace EnterBarrier with EnterBarrierAsync +- Use RunOnAsync for async operations +- Use WithinAsync for async timing constraints +- Add using System.Threading.Tasks +``` + +## Notes +- This migration improves test reliability by preventing thread pool starvation +- Tests should run faster and more reliably in CI environments +- The async APIs provide better cancellation support via CancellationToken \ No newline at end of file diff --git a/check-multinode-migration.sh b/check-multinode-migration.sh new file mode 100755 index 00000000000..f9b9866982e --- /dev/null +++ b/check-multinode-migration.sh @@ -0,0 +1,94 @@ +#!/bin/bash + +# Multi-Node Test Async Migration Status Checker +# This script helps identify which multi-node tests still need async migration + +echo "=========================================" +echo "Multi-Node Test Async Migration Status" +echo "=========================================" +echo "" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Function to check a directory +check_directory() { + local dir=$1 + local name=$2 + + echo -e "${YELLOW}Checking $name:${NC}" + echo "----------------------------------------" + + # Find files with blocking TestConductor calls + local blocking_files=$(find "$dir" -name "*.cs" -exec grep -l "TestConductor.*\.Wait()" {} \; 2>/dev/null | sort) + + if [ -z "$blocking_files" ]; then + echo -e "${GREEN}✓ No TestConductor blocking calls found${NC}" + else + echo -e "${RED}✗ Files with TestConductor.*.Wait() calls:${NC}" + for file in $blocking_files; do + basename_file=$(basename "$file") + count=$(grep -c "\.Wait()" "$file") + echo " - $basename_file ($count .Wait() calls)" + done + fi + + # Check for EnterBarrier (non-async) + local barrier_count=$(find "$dir" -name "*.cs" -exec grep -l "EnterBarrier(" {} \; 2>/dev/null | wc -l) + if [ "$barrier_count" -gt 0 ]; then + echo -e "${YELLOW}⚠ $barrier_count files still use EnterBarrier (should be EnterBarrierAsync)${NC}" + fi + + # Check for Within (non-async) + local within_count=$(find "$dir" -name "*.cs" -exec grep -l "Within(" {} \; 2>/dev/null | wc -l) + if [ "$within_count" -gt 0 ]; then + echo -e "${YELLOW}⚠ $within_count files use Within (may need WithinAsync)${NC}" + fi + + echo "" +} + +# Check core tests +check_directory "src/core/Akka.Cluster.Tests.MultiNode" "Akka.Cluster.Tests.MultiNode" +check_directory "src/core/Akka.Remote.Tests.MultiNode" "Akka.Remote.Tests.MultiNode" + +# Check contrib tests +if [ -d "src/contrib/cluster/Akka.Cluster.Sharding.Tests.MultiNode" ]; then + check_directory "src/contrib/cluster/Akka.Cluster.Sharding.Tests.MultiNode" "Akka.Cluster.Sharding.Tests.MultiNode" +fi + +if [ -d "src/contrib/cluster/Akka.Cluster.Tools.Tests.MultiNode" ]; then + check_directory "src/contrib/cluster/Akka.Cluster.Tools.Tests.MultiNode" "Akka.Cluster.Tools.Tests.MultiNode" +fi + +if [ -d "src/contrib/cluster/Akka.Cluster.Metrics.Tests.MultiNode" ]; then + check_directory "src/contrib/cluster/Akka.Cluster.Metrics.Tests.MultiNode" "Akka.Cluster.Metrics.Tests.MultiNode" +fi + +if [ -d "src/contrib/cluster/Akka.DistributedData.Tests.MultiNode" ]; then + check_directory "src/contrib/cluster/Akka.DistributedData.Tests.MultiNode" "Akka.DistributedData.Tests.MultiNode" +fi + +echo "=========================================" +echo "Summary" +echo "=========================================" + +# Count total blocking files +total_blocking=$(find src -name "*.cs" -path "*Tests.MultiNode*" -exec grep -l "TestConductor.*\.Wait()" {} \; 2>/dev/null | wc -l) +total_files=$(find src -name "*.cs" -path "*Tests.MultiNode*" 2>/dev/null | wc -l) + +echo "Total multi-node test files: $total_files" +echo -e "${RED}Files with blocking TestConductor calls: $total_blocking${NC}" + +if [ "$total_blocking" -eq 0 ]; then + echo -e "${GREEN}🎉 All TestConductor blocking calls have been migrated!${NC}" +else + echo -e "${YELLOW}⚠ Migration still needed for $total_blocking files${NC}" +fi + +echo "" +echo "Run this script periodically to track migration progress." +echo "See MULTINODE_TEST_ASYNC_MIGRATION.md for migration guide." \ No newline at end of file diff --git a/src/contrib/cluster/Akka.Cluster.Tools.Tests.MultiNode/PublishSubscribe/DistributedPubSubRestartSpec.cs b/src/contrib/cluster/Akka.Cluster.Tools.Tests.MultiNode/PublishSubscribe/DistributedPubSubRestartSpec.cs index 0a6868227c3..fd755c0fa9b 100644 --- a/src/contrib/cluster/Akka.Cluster.Tools.Tests.MultiNode/PublishSubscribe/DistributedPubSubRestartSpec.cs +++ b/src/contrib/cluster/Akka.Cluster.Tools.Tests.MultiNode/PublishSubscribe/DistributedPubSubRestartSpec.cs @@ -15,22 +15,23 @@ using Akka.Remote.TestKit; using FluentAssertions; using FluentAssertions.Extensions; +using System.Threading.Tasks; -namespace Akka.Cluster.Tools.Tests.MultiNode.PublishSubscribe +namespace Akka.Cluster.Tools.Tests.MultiNode.PublishSubscribe; + +public class DistributedPubSubRestartSpecConfig : MultiNodeConfig { - public class DistributedPubSubRestartSpecConfig : MultiNodeConfig - { - public RoleName First { get; } - public RoleName Second { get; } - public RoleName Third { get; } + public RoleName First { get; } + public RoleName Second { get; } + public RoleName Third { get; } - public DistributedPubSubRestartSpecConfig() - { - First = Role("first"); - Second = Role("second"); - Third = Role("third"); + public DistributedPubSubRestartSpecConfig() + { + First = Role("first"); + Second = Role("second"); + Third = Role("third"); - CommonConfig = ConfigurationFactory.ParseString(@" + CommonConfig = ConfigurationFactory.ParseString(@" akka.loglevel = INFO akka.actor.provider = ""Akka.Cluster.ClusterActorRefProvider, Akka.Cluster"" akka.cluster.pub-sub.gossip-interval = 500ms @@ -38,177 +39,176 @@ public DistributedPubSubRestartSpecConfig() akka.cluster.auto-down-unreachable-after = off ").WithFallback(DistributedPubSub.DefaultConfig()); - TestTransport = true; - } + TestTransport = true; + } - internal class Shutdown : ReceiveActor + internal class Shutdown : ReceiveActor + { + public Shutdown() { - public Shutdown() + Context.GetLogger().Info("Shutdown actor started on {0}", Context.System.Name); + Receive(str => str.Equals("shutdown"), _ => { - Context.GetLogger().Info("Shutdown actor started on {0}", Context.System.Name); - Receive(str => str.Equals("shutdown"), _ => - { - Context.System.Terminate(); - }); - } + Context.System.Terminate(); + }); } } +} + +public class DistributedPubSubRestartSpec : MultiNodeClusterSpec +{ + private readonly DistributedPubSubRestartSpecConfig _config; - public class DistributedPubSubRestartSpec : MultiNodeClusterSpec + public DistributedPubSubRestartSpec() : this(new DistributedPubSubRestartSpecConfig()) { - private readonly DistributedPubSubRestartSpecConfig _config; + } - public DistributedPubSubRestartSpec() : this(new DistributedPubSubRestartSpecConfig()) - { - } + protected DistributedPubSubRestartSpec(DistributedPubSubRestartSpecConfig config) : base(config, typeof(DistributedPubSubRestartSpec)) + { + _config = config; + } - protected DistributedPubSubRestartSpec(DistributedPubSubRestartSpecConfig config) : base(config, typeof(DistributedPubSubRestartSpec)) - { - _config = config; - } + [MultiNodeFact] + public async Task DistributedPubSubRestartSpecs() + { + await A_Cluster_with_DistributedPubSub_must_startup_3_node_cluster(); + await A_Cluster_with_DistributedPubSub_must_handle_restart_of_nodes_with_same_address(); + } - [MultiNodeFact] - public void DistributedPubSubRestartSpecs() + public async Task A_Cluster_with_DistributedPubSub_must_startup_3_node_cluster() + { + await WithinAsync(15.Seconds(), async () => { - A_Cluster_with_DistributedPubSub_must_startup_3_node_cluster(); - A_Cluster_with_DistributedPubSub_must_handle_restart_of_nodes_with_same_address(); - } + await JoinAsync(_config.First, _config.First); + await JoinAsync(_config.Second, _config.First); + await JoinAsync(_config.Third, _config.First); + await EnterBarrierAsync("after-1"); + }); + } - public void A_Cluster_with_DistributedPubSub_must_startup_3_node_cluster() + public async Task A_Cluster_with_DistributedPubSub_must_handle_restart_of_nodes_with_same_address() + { + await WithinAsync(30.Seconds(), async () => { - Within(15.Seconds(), () => - { - Join(_config.First, _config.First); - Join(_config.Second, _config.First); - Join(_config.Third, _config.First); - EnterBarrier("after-1"); - }); - } + Mediator.Tell(new Subscribe("topic1", TestActor)); + ExpectMsg(); + await CountAsync(3); - public void A_Cluster_with_DistributedPubSub_must_handle_restart_of_nodes_with_same_address() - { - Within(30.Seconds(), () => + RunOn(() => { - Mediator.Tell(new Subscribe("topic1", TestActor)); - ExpectMsg(); - AwaitCount(3); - - RunOn(() => - { - Mediator.Tell(new Publish("topic1", "msg1")); - }, _config.First); - EnterBarrier("pub-msg1"); + Mediator.Tell(new Publish("topic1", "msg1")); + }, _config.First); + await EnterBarrierAsync("pub-msg1"); - ExpectMsg("msg1"); - EnterBarrier("got-msg1"); + await ExpectMsgAsync("msg1"); + await EnterBarrierAsync("got-msg1"); - RunOn(() => - { - Mediator.Tell(DeltaCount.Instance); - var oldDeltaCount = ExpectMsg(); + await RunOnAsync(async () => + { + Mediator.Tell(DeltaCount.Instance); + var oldDeltaCount = await ExpectMsgAsync(); - EnterBarrier("end"); + await EnterBarrierAsync("end"); - Mediator.Tell(DeltaCount.Instance); - var deltaCount = ExpectMsg(); - deltaCount.Should().Be(oldDeltaCount); - }, _config.Second); + Mediator.Tell(DeltaCount.Instance); + var deltaCount = await ExpectMsgAsync(); + deltaCount.Should().Be(oldDeltaCount); + }, _config.Second); - RunOn(() => - { - Mediator.Tell(DeltaCount.Instance); - var oldDeltaCount = ExpectMsg(); + await RunOnAsync(async () => + { + Mediator.Tell(DeltaCount.Instance); + var oldDeltaCount = await ExpectMsgAsync(); - var thirdAddress = Node(_config.Third).Address; - TestConductor.Shutdown(_config.Third).Wait(); + var thirdAddress = (await NodeAsync(_config.Third)).Address; + await TestConductor.Shutdown(_config.Third).WaitAsync(30.Seconds()); - Within(20.Seconds(), () => + await WithinAsync(20.Seconds(), async () => + { + await AwaitAssertAsync(async () => { - AwaitAssert(() => - { - Sys.ActorSelection(new RootActorPath(thirdAddress) / "user" / "shutdown").Tell(new Identify(null)); - ExpectMsg(1.Seconds()).Subject.Should().NotBeNull(); - }); + Sys.ActorSelection(new RootActorPath(thirdAddress) / "user" / "shutdown").Tell(new Identify(null)); + (await ExpectMsgAsync(1.Seconds())).Subject.Should().NotBeNull(); }); + }); - Sys.ActorSelection(new RootActorPath(thirdAddress) / "user" / "shutdown").Tell("shutdown"); + Sys.ActorSelection(new RootActorPath(thirdAddress) / "user" / "shutdown").Tell("shutdown"); - EnterBarrier("end"); + await EnterBarrierAsync("end"); - Mediator.Tell(DeltaCount.Instance); - var deltaCount = ExpectMsg(); - deltaCount.Should().Be(oldDeltaCount); - }, _config.First); + Mediator.Tell(DeltaCount.Instance); + var deltaCount = await ExpectMsgAsync(); + deltaCount.Should().Be(oldDeltaCount); + }, _config.First); - RunOn(() => + await RunOnAsync(async () => + { + var node3Address = Cluster.Get(Sys).SelfAddress; + await Sys.WhenTerminated.WaitAsync(30.Seconds()); + var newSystem = ActorSystem.Create( + Sys.Name, + ConfigurationFactory + .ParseString($"akka.remote.dot-netty.tcp.port={node3Address.Port}") + .WithFallback(Sys.Settings.Config)); + + try { - var node3Address = Cluster.Get(Sys).SelfAddress; - Sys.WhenTerminated.Wait(10.Seconds()); - var newSystem = ActorSystem.Create( - Sys.Name, - ConfigurationFactory - .ParseString($"akka.remote.dot-netty.tcp.port={node3Address.Port}") - .WithFallback(Sys.Settings.Config)); - - try - { - // don't join the old cluster - Cluster.Get(newSystem).Join(Cluster.Get(newSystem).SelfAddress); - var newMediator = DistributedPubSub.Get(newSystem).Mediator; - var probe = CreateTestProbe(newSystem); - newMediator.Tell(new Subscribe("topic2", probe.Ref), probe.Ref); - probe.ExpectMsg(); - - // let them gossip, but Delta should not be exchanged - probe.ExpectNoMsg(5.Seconds()); - newMediator.Tell(DeltaCount.Instance, probe.Ref); - probe.ExpectMsg(0L); - - newSystem.Log.Info("Shutdown actor started on {0}",node3Address); - newSystem.ActorOf("shutdown"); - newSystem.WhenTerminated.Wait(10.Seconds()); - } - finally - { - newSystem.Terminate(); - } - }, _config.Third); - }); - } + // don't join the old cluster + await Cluster.Get(newSystem).JoinAsync(Cluster.Get(newSystem).SelfAddress); + var newMediator = DistributedPubSub.Get(newSystem).Mediator; + var probe = CreateTestProbe(newSystem); + newMediator.Tell(new Subscribe("topic2", probe.Ref), probe.Ref); + await probe.ExpectMsgAsync(); + + // let them gossip, but Delta should not be exchanged + await probe.ExpectNoMsgAsync(5.Seconds()); + newMediator.Tell(DeltaCount.Instance, probe.Ref); + await probe.ExpectMsgAsync(0L); + + newSystem.Log.Info("Shutdown actor started on {0}",node3Address); + newSystem.ActorOf("shutdown"); + await newSystem.WhenTerminated.WaitAsync(30.Seconds()); + } + finally + { + await newSystem.Terminate().WaitAsync(30.Seconds()); + } + }, _config.Third); + }); + } - protected override int InitialParticipantsValueFactory => Roles.Count; + protected override int InitialParticipantsValueFactory => Roles.Count; - private IActorRef CreateMediator() - { - return DistributedPubSub.Get(Sys).Mediator; - } + private IActorRef CreateMediator() + { + return DistributedPubSub.Get(Sys).Mediator; + } - private IActorRef Mediator + private IActorRef Mediator + { + get { - get - { - return DistributedPubSub.Get(Sys).Mediator; - } + return DistributedPubSub.Get(Sys).Mediator; } + } - private void Join(RoleName from, RoleName to) + private async Task JoinAsync(RoleName from, RoleName to) + { + RunOn(() => { - RunOn(() => - { - Cluster.Get(Sys).Join(Node(to).Address); - CreateMediator(); - }, from); - EnterBarrier(from.Name + "-joined"); - } + Cluster.Get(Sys).Join(Node(to).Address); + CreateMediator(); + }, from); + await EnterBarrierAsync(from.Name + "-joined"); + } - private void AwaitCount(int expected) + private async Task CountAsync(int expected) + { + var probe = CreateTestProbe(); + await AwaitAssertAsync(async () => { - var probe = CreateTestProbe(); - AwaitAssert(() => - { - Mediator.Tell(Count.Instance, probe.Ref); - probe.ExpectMsg().Should().Be(expected); - }); - } + Mediator.Tell(Count.Instance, probe.Ref); + (await probe.ExpectMsgAsync()).Should().Be(expected); + }); } -} +} \ No newline at end of file diff --git a/src/core/Akka.Cluster.TestKit/MultiNodeClusterSpec.cs b/src/core/Akka.Cluster.TestKit/MultiNodeClusterSpec.cs index cf01e4aea72..86f3f54e116 100644 --- a/src/core/Akka.Cluster.TestKit/MultiNodeClusterSpec.cs +++ b/src/core/Akka.Cluster.TestKit/MultiNodeClusterSpec.cs @@ -11,6 +11,8 @@ using System.Collections.Immutable; using System.Linq; using System.Text.RegularExpressions; +using System.Threading; +using System.Threading.Tasks; using Akka.Actor; using Akka.Actor.Setup; using Akka.Cluster.Tests.MultiNode; @@ -258,6 +260,20 @@ public void StartClusterNode() } } + /// + /// Use this method for the initial startup of the cluster node + /// + public async Task StartClusterNodeAsync(CancellationToken cancellationToken = default) + { + if (ClusterView.Members.IsEmpty) + { + // !!! NOTE: Do not convert this to JoinAsync() !!! + // ReSharper disable once MethodHasAsyncOverloadWithCancellation + Cluster.Join(GetAddress(Myself)); + await AwaitAssertAsync(() => Assert.Contains(GetAddress(Myself), ClusterView.Members.Select(m => m.Address)), cancellationToken: cancellationToken); + } + } + /// /// Initialize the cluster of the specified member nodes () /// and wait until all joined and . @@ -279,6 +295,28 @@ public void AwaitClusterUp(params RoleName[] roles) EnterBarrier(roles.Select(r => r.Name).Aggregate((a, b) => a + "-" + b) + "-joined"); } + /// + /// Initialize the cluster of the specified member nodes () + /// and wait until all joined and . + /// + /// First node will be started first and others will join the first. + /// + public async Task AwaitClusterUpAsync(CancellationToken cancellationToken, params RoleName[] roles) + { + // make sure that the node-to-join is started before other join + await RunOnAsync(async () => await StartClusterNodeAsync(cancellationToken), roles.First()); + + await EnterBarrierAsync(cancellationToken, roles.First().Name + "-started"); + if (roles.Skip(1).Contains(Myself)) + await Cluster.JoinAsync(GetAddress(roles.First()), cancellationToken); + + if (roles.Contains(Myself)) + { + await AwaitMembersUpAsync(roles.Length, cancellationToken: cancellationToken); + } + await EnterBarrierAsync(cancellationToken, roles.Select(r => r.Name).Aggregate((a, b) => a + "-" + b) + "-joined"); + } + public void JoinWithin(RoleName joinNode, TimeSpan? max = null, TimeSpan? interval = null) { if (max == null) max = RemainingOrDefault; @@ -380,11 +418,48 @@ public void AwaitMembersUp( }); } + public async Task AwaitMembersUpAsync( + int numbersOfMembers, + ImmutableHashSet
canNotBePartOfMemberRing = null, + TimeSpan? timeout = null, + CancellationToken cancellationToken = default) + { + canNotBePartOfMemberRing ??= ImmutableHashSet.Create
(); + timeout ??= TimeSpan.FromSeconds(25); + + await WithinAsync(timeout.Value, async () => + { + if (canNotBePartOfMemberRing.Any()) // don't run this on an empty set + await AwaitAssertAsync(() => + { + foreach (var a in canNotBePartOfMemberRing) + _assertions.AssertFalse(ClusterView.Members.Select(m => m.Address).Contains(a)); + }, cancellationToken: cancellationToken); + await AwaitAssertAsync( + () => _assertions.AssertEqual(numbersOfMembers, ClusterView.Members.Count), + cancellationToken: cancellationToken); + await AwaitAssertAsync( + () => _assertions.AssertTrue(ClusterView.Members.All(m => m.Status == MemberStatus.Up), "All members should be up"), + cancellationToken: cancellationToken); + // clusterView.leader is updated by LeaderChanged, await that to be updated also + var firstMember = ClusterView.Members.FirstOrDefault(); + var expectedLeader = firstMember?.Address; + await AwaitAssertAsync( + () => _assertions.AssertEqual(expectedLeader, ClusterView.Leader), + cancellationToken: cancellationToken); + }, cancellationToken: cancellationToken); + } + public void AwaitAllReachable() { AwaitAssert(() => _assertions.AssertFalse(ClusterView.UnreachableMembers.Any())); } + public async Task AwaitAllReachableAsync() + { + await AwaitAssertAsync(() => _assertions.AssertFalse(ClusterView.UnreachableMembers.Any())); + } + public void AwaitSeenSameState(params Address[] addresses) { AwaitAssert(() => _assertions.AssertFalse(addresses.ToImmutableHashSet().Except(ClusterView.SeenBy).Any())); diff --git a/src/core/Akka.Cluster.Tests.MultiNode/ClusterAccrualFailureDetectorSpec.cs b/src/core/Akka.Cluster.Tests.MultiNode/ClusterAccrualFailureDetectorSpec.cs index 9e1a8c8853b..25130063ee1 100644 --- a/src/core/Akka.Cluster.Tests.MultiNode/ClusterAccrualFailureDetectorSpec.cs +++ b/src/core/Akka.Cluster.Tests.MultiNode/ClusterAccrualFailureDetectorSpec.cs @@ -7,6 +7,7 @@ using System; using System.Threading; +using System.Threading.Tasks; using Akka.Cluster.TestKit; using Akka.Configuration; using Akka.Remote.TestKit; @@ -15,142 +16,135 @@ using Akka.Cluster.Tests.MultiNode; using Akka.MultiNode.TestAdapter; -namespace Akka.Cluster.Tests.MultiNode +namespace Akka.Cluster.Tests.MultiNode; + +public class ClusterAccrualFailureDetectorMultiSpec : MultiNodeConfig { + public RoleName First { get; } + + public RoleName Second { get; } + + public RoleName Third { get; } + + public ClusterAccrualFailureDetectorMultiSpec() + { + First = Role("first"); + Second = Role("second"); + Third = Role("third"); + + CommonConfig= DebugConfig(false) + .WithFallback(ConfigurationFactory.ParseString("akka.cluster.failure-detector.threshold = 4")) + .WithFallback(MultiNodeClusterSpec.ClusterConfig()); + + TestTransport = true; + } +} + +public class ClusterAccrualFailureDetectorSpec : MultiNodeClusterSpec { - public class ClusterAccrualFailureDetectorMultiSpec : MultiNodeConfig { - public RoleName First { get; private set; } + private readonly ClusterAccrualFailureDetectorMultiSpec _config; - public RoleName Second { get; private set; } + public ClusterAccrualFailureDetectorSpec() + : this(new ClusterAccrualFailureDetectorMultiSpec()) + { + MuteMarkingAsUnreachable(); + } - public RoleName Third { get; private set; } + protected ClusterAccrualFailureDetectorSpec(ClusterAccrualFailureDetectorMultiSpec config) + : base(config, typeof(ClusterAccrualFailureDetectorSpec)) + { + _config = config; + } - public ClusterAccrualFailureDetectorMultiSpec() - { - First = Role("first"); - Second = Role("second"); - Third = Role("third"); + [MultiNodeFact] + public async Task ClusterAccrualFailureDetectorSpecs() + { + await A_heartbeat_driven_Failure_Detector_receive_heartbeats_so_that_all_member_nodes_in_the_cluster_are_marked_available(); + await A_heartbeat_driven_Failure_Detector_mark_node_as_unavailable_when_network_partition_and_then_back_to_available_when_partition_is_healed(); + await A_heartbeat_driven_Failure_Detector_mark_node_as_unavailable_if_a_node_in_the_cluster_is_shut_down_and_its_heartbeats_stops(); + } - CommonConfig= DebugConfig(false) - .WithFallback(ConfigurationFactory.ParseString("akka.cluster.failure-detector.threshold = 4")) - .WithFallback(MultiNodeClusterSpec.ClusterConfig()); + public async Task A_heartbeat_driven_Failure_Detector_receive_heartbeats_so_that_all_member_nodes_in_the_cluster_are_marked_available() + { + await AwaitClusterUpAsync(CancellationToken.None, _config.First, _config.Second, _config.Third); + + await Task.Yield(); + + Cluster.FailureDetector.IsAvailable(GetAddress(_config.First)).ShouldBeTrue(); + Cluster.FailureDetector.IsAvailable(GetAddress(_config.Second)).ShouldBeTrue(); + Cluster.FailureDetector.IsAvailable(GetAddress(_config.Third)).ShouldBeTrue(); - TestTransport = true; - } + await EnterBarrierAsync("after-1"); } - public class ClusterAccrualFailureDetectorSpec : MultiNodeClusterSpec + public async Task A_heartbeat_driven_Failure_Detector_mark_node_as_unavailable_when_network_partition_and_then_back_to_available_when_partition_is_healed() { - private readonly ClusterAccrualFailureDetectorMultiSpec _config; + await RunOnAsync(async () => { + await TestConductor.BlackholeAsync(_config.First, _config.Second, ThrottleTransportAdapter.Direction.Both); + }, _config.First); - public ClusterAccrualFailureDetectorSpec() - : this(new ClusterAccrualFailureDetectorMultiSpec()) - { - MuteMarkingAsUnreachable(); - } + await EnterBarrierAsync("broken"); - protected ClusterAccrualFailureDetectorSpec(ClusterAccrualFailureDetectorMultiSpec config) - : base(config, typeof(ClusterAccrualFailureDetectorSpec)) + RunOn(() => { - _config = config; - } + // detect failure... + AwaitCondition(() => !Cluster.FailureDetector.IsAvailable(GetAddress(_config.Second)), + TimeSpan.FromSeconds(15)); + // other connections still ok + Cluster.FailureDetector.IsAvailable(GetAddress(_config.Third)).ShouldBeTrue(); + }, _config.First); - [MultiNodeFact] - public void ClusterAccrualFailureDetectorSpecs() - { - A_heartbeat_driven_Failure_Detector_receive_heartbeats_so_that_all_member_nodes_in_the_cluster_are_marked_available - (); - A_heartbeat_driven_Failure_Detector_mark_node_as_unavailable_when_network_partition_and_then_back_to_available_when_partition_is_healed - (); - A_heartbeat_driven_Failure_Detector_mark_node_as_unavailable_if_a_node_in_the_cluster_is_shut_down_and_its_heartbeats_stops - (); - } - - public void - A_heartbeat_driven_Failure_Detector_receive_heartbeats_so_that_all_member_nodes_in_the_cluster_are_marked_available - () + RunOn(() => { - AwaitClusterUp(_config.First, _config.Second, _config.Third); - - Thread.Sleep(5); - Cluster.FailureDetector.IsAvailable(GetAddress(_config.First)).ShouldBeTrue(); - Cluster.FailureDetector.IsAvailable(GetAddress(_config.Second)).ShouldBeTrue(); + // detect failure... + AwaitCondition(() => !Cluster.FailureDetector.IsAvailable(GetAddress(_config.First)), + TimeSpan.FromSeconds(15)); + // other connections still ok Cluster.FailureDetector.IsAvailable(GetAddress(_config.Third)).ShouldBeTrue(); + }, _config.Second); + + + await EnterBarrierAsync("partitioned"); + + await RunOnAsync(async () => { + await TestConductor.PassThroughAsync(_config.First, _config.Second, ThrottleTransportAdapter.Direction.Both); + }, _config.First); + + await EnterBarrierAsync("repaired"); - EnterBarrier("after-1"); - } + RunOn(() => + { + AwaitCondition(() => Cluster.FailureDetector.IsAvailable(GetAddress(_config.Second)), + TimeSpan.FromSeconds(15)); + }, _config.First, _config.Third); - public void - A_heartbeat_driven_Failure_Detector_mark_node_as_unavailable_when_network_partition_and_then_back_to_available_when_partition_is_healed - () + RunOn(() => { - RunOn(() => { - TestConductor.Blackhole(_config.First, _config.Second, ThrottleTransportAdapter.Direction.Both).Wait(); - }, _config.First); - - EnterBarrier("broken"); - - RunOn(() => - { - // detect failure... - AwaitCondition(() => !Cluster.FailureDetector.IsAvailable(GetAddress(_config.Second)), - TimeSpan.FromSeconds(15)); - // other connections still ok - Cluster.FailureDetector.IsAvailable(GetAddress(_config.Third)).ShouldBeTrue(); - }, _config.First); - - RunOn(() => - { - // detect failure... - AwaitCondition(() => !Cluster.FailureDetector.IsAvailable(GetAddress(_config.First)), - TimeSpan.FromSeconds(15)); - // other connections still ok - Cluster.FailureDetector.IsAvailable(GetAddress(_config.Third)).ShouldBeTrue(); - }, _config.Second); - - - EnterBarrier("partitioned"); - - RunOn(() => { - TestConductor.PassThrough(_config.First, _config.Second, ThrottleTransportAdapter.Direction.Both).Wait(); - }, _config.First); - - EnterBarrier("repaired"); - - RunOn(() => - { - AwaitCondition(() => Cluster.FailureDetector.IsAvailable(GetAddress(_config.Second)), - TimeSpan.FromSeconds(15)); - }, _config.First, _config.Third); - - RunOn(() => - { - AwaitCondition(() => Cluster.FailureDetector.IsAvailable(GetAddress(_config.First)), - TimeSpan.FromSeconds(15)); - }, _config.Second); - - EnterBarrier("after-2"); - } + AwaitCondition(() => Cluster.FailureDetector.IsAvailable(GetAddress(_config.First)), + TimeSpan.FromSeconds(15)); + }, _config.Second); + + await EnterBarrierAsync("after-2"); + } - public void - A_heartbeat_driven_Failure_Detector_mark_node_as_unavailable_if_a_node_in_the_cluster_is_shut_down_and_its_heartbeats_stops - () + public async Task + A_heartbeat_driven_Failure_Detector_mark_node_as_unavailable_if_a_node_in_the_cluster_is_shut_down_and_its_heartbeats_stops + () + { + await RunOnAsync(async () => { + await TestConductor.ExitAsync(_config.Third, 0); + }, _config.First); + + await EnterBarrierAsync("third-shutdown"); + + RunOn(() => { - RunOn(() => { - TestConductor.Exit(_config.Third, 0).Wait(); - }, _config.First); - - EnterBarrier("third-shutdown"); - - RunOn(() => - { - // remaining nodes should detect failure... - AwaitCondition(() => !Cluster.FailureDetector.IsAvailable(GetAddress(_config.Third)), TimeSpan.FromSeconds(15)); - // other connections still ok - Cluster.FailureDetector.IsAvailable(GetAddress(_config.First)).ShouldBeTrue(); - Cluster.FailureDetector.IsAvailable(GetAddress(_config.Second)).ShouldBeTrue(); - }, _config.First, _config.Second); - - EnterBarrier("after-3"); - } + // remaining nodes should detect failure... + AwaitCondition(() => !Cluster.FailureDetector.IsAvailable(GetAddress(_config.Third)), TimeSpan.FromSeconds(15)); + // other connections still ok + Cluster.FailureDetector.IsAvailable(GetAddress(_config.First)).ShouldBeTrue(); + Cluster.FailureDetector.IsAvailable(GetAddress(_config.Second)).ShouldBeTrue(); + }, _config.First, _config.Second); + + await EnterBarrierAsync("after-3"); } -} +} \ No newline at end of file diff --git a/src/core/Akka.Cluster.Tests.MultiNode/LeaderElectionSpec.cs b/src/core/Akka.Cluster.Tests.MultiNode/LeaderElectionSpec.cs index 7e19f47c2df..77fea1782ba 100644 --- a/src/core/Akka.Cluster.Tests.MultiNode/LeaderElectionSpec.cs +++ b/src/core/Akka.Cluster.Tests.MultiNode/LeaderElectionSpec.cs @@ -8,167 +8,168 @@ using System; using System.Collections.Immutable; using System.Linq; +using System.Threading; +using System.Threading.Tasks; using Akka.Cluster.TestKit; using Akka.MultiNode.TestAdapter; using Akka.Remote.TestKit; using Akka.TestKit; -namespace Akka.Cluster.Tests.MultiNode +namespace Akka.Cluster.Tests.MultiNode; + +public class LeaderElectionSpecConfig : MultiNodeConfig { - public class LeaderElectionSpecConfig : MultiNodeConfig - { - public RoleName Controller { get; private set; } - public RoleName First { get; private set; } - public RoleName Second { get; private set; } - public RoleName Third { get; private set; } - public RoleName Forth { get; private set; } + public RoleName Controller { get; } + public RoleName First { get; } + public RoleName Second { get; } + public RoleName Third { get; } + public RoleName Forth { get; } - public LeaderElectionSpecConfig(bool failureDetectorPuppet) - { - Controller = Role("controller"); - First = Role("first"); - Second = Role("second"); - Third = Role("third"); - Forth = Role("forth"); - - CommonConfig = DebugConfig(false) - .WithFallback(MultiNodeClusterSpec.ClusterConfig(failureDetectorPuppet)); - } + public LeaderElectionSpecConfig(bool failureDetectorPuppet) + { + Controller = Role("controller"); + First = Role("first"); + Second = Role("second"); + Third = Role("third"); + Forth = Role("forth"); + + CommonConfig = DebugConfig(false) + .WithFallback(MultiNodeClusterSpec.ClusterConfig(failureDetectorPuppet)); } +} - public class LeaderElectionWithFailureDetectorPuppetMultiJvmNode : LeaderElectionSpec +public class LeaderElectionWithFailureDetectorPuppetMultiJvmNode : LeaderElectionSpec +{ + public LeaderElectionWithFailureDetectorPuppetMultiJvmNode() + : base(true, typeof(LeaderElectionWithFailureDetectorPuppetMultiJvmNode)) { - public LeaderElectionWithFailureDetectorPuppetMultiJvmNode() - : base(true, typeof(LeaderElectionWithFailureDetectorPuppetMultiJvmNode)) - { - } } +} - public class LeaderElectionWithAccrualFailureDetectorMultiJvmNode : LeaderElectionSpec +public class LeaderElectionWithAccrualFailureDetectorMultiJvmNode : LeaderElectionSpec +{ + public LeaderElectionWithAccrualFailureDetectorMultiJvmNode() + : base(false, typeof(LeaderElectionWithAccrualFailureDetectorMultiJvmNode)) { - public LeaderElectionWithAccrualFailureDetectorMultiJvmNode() - : base(false, typeof(LeaderElectionWithAccrualFailureDetectorMultiJvmNode)) - { - } } +} - public abstract class LeaderElectionSpec : MultiNodeClusterSpec - { - private readonly LeaderElectionSpecConfig _config; +public abstract class LeaderElectionSpec : MultiNodeClusterSpec +{ + private readonly LeaderElectionSpecConfig _config; - private readonly ImmutableList _sortedRoles; + private readonly ImmutableList _sortedRoles; - protected LeaderElectionSpec(bool failureDetectorPuppet, Type type) - : this(new LeaderElectionSpecConfig(failureDetectorPuppet), type) - { + protected LeaderElectionSpec(bool failureDetectorPuppet, Type type) + : this(new LeaderElectionSpecConfig(failureDetectorPuppet), type) + { - } + } - protected LeaderElectionSpec(LeaderElectionSpecConfig config, Type type) - : base(config, type) - { - _config = config; - _sortedRoles = ImmutableList.Create( + protected LeaderElectionSpec(LeaderElectionSpecConfig config, Type type) + : base(config, type) + { + _config = config; + _sortedRoles = ImmutableList.Create( _config.First, _config.Second, _config.Third, _config.Forth) - .Sort(new RoleNameComparer(this)); - } + .Sort(new RoleNameComparer(this)); + } - [MultiNodeFact] - public void LeaderElectionSpecs() + [MultiNodeFact] + public async Task LeaderElectionSpecs() + { + await Cluster_of_four_nodes_must_be_able_to_elect_single_leaderAsync(); + await Cluster_of_four_nodes_must_be_able_to_reelect_single_leader_after_leader_has_leftAsync(); + await Cluster_of_four_nodes_must_be_able_to_reelect_single_leader_after_leader_has_left_again(); + } + + public async Task Cluster_of_four_nodes_must_be_able_to_elect_single_leaderAsync() + { + await AwaitClusterUpAsync(CancellationToken.None, _config.First, _config.Second, _config.Third, _config.Forth); + + if (Myself != _config.Controller) { - Cluster_of_four_nodes_must_be_able_to_elect_single_leader(); - Cluster_of_four_nodes_must_be_able_to_reelect_single_leader_after_leader_has_left(); - Cluster_of_four_nodes_must_be_able_to_reelect_single_leader_after_leader_has_left_again(); + ClusterView.IsLeader.ShouldBe(Myself == _sortedRoles.First()); + AssertLeaderIn(_sortedRoles); } - public void Cluster_of_four_nodes_must_be_able_to_elect_single_leader() + await EnterBarrierAsync("after-1"); + } + + public async Task ShutdownLeaderAndVerifyNewLeaderAsync(int alreadyShutdown) + { + var currentRoles = _sortedRoles.Skip(alreadyShutdown).ToList(); + currentRoles.Count.ShouldBeGreaterOrEqual(2); + var leader = currentRoles.First(); + var aUser = currentRoles.Last(); + var remainingRoles = currentRoles.Skip(1).ToImmutableList(); + var n = "-" + (alreadyShutdown + 1); + + if (Myself == _config.Controller) + { + await EnterBarrierAsync("before-shutdown" + n); + await TestConductor.ExitAsync(leader, 0); + await EnterBarrierAsync("after-shutdown" + n, "after-unavailable" + n, "after-down" + n, "completed" + n); + } + else if (Myself == leader) + { + await EnterBarrierAsync("before-shutdown" + n, "after-shutdown" + n); + // this node will be shutdown by the controller and doesn't participate in more barriers + } + else if (Myself == aUser) { - AwaitClusterUp(_config.First, _config.Second, _config.Third, _config.Forth); + var leaderAddress = GetAddress(leader); + await EnterBarrierAsync("before-shutdown" + n, "after-shutdown" + n); - if (Myself != _config.Controller) - { - ClusterView.IsLeader.ShouldBe(Myself == _sortedRoles.First()); - AssertLeaderIn(_sortedRoles); - } + // detect failure + MarkNodeAsUnavailable(leaderAddress); + await AwaitAssertAsync(() => ClusterView.UnreachableMembers.Select(x => x.Address).Contains(leaderAddress).ShouldBeTrue()); + await EnterBarrierAsync("after-unavailable" + n); - EnterBarrier("after-1"); - } + // user marks the shutdown leader as DOWN + Cluster.Down(leaderAddress); - public void ShutdownLeaderAndVerifyNewLeader(int alreadyShutdown) + // removed + await AwaitAssertAsync((() => ClusterView.UnreachableMembers.Select(x => x.Address).Contains(leaderAddress).ShouldBeFalse())); + await EnterBarrierAsync("after-down" + n, "completed" + n); + } + else if (remainingRoles.Contains(Myself)) { - var currentRoles = _sortedRoles.Skip(alreadyShutdown).ToList(); - currentRoles.Count.ShouldBeGreaterOrEqual(2); - var leader = currentRoles.First(); - var aUser = currentRoles.Last(); - var remainingRoles = currentRoles.Skip(1).ToImmutableList(); - var n = "-" + (alreadyShutdown + 1); - - if (Myself == _config.Controller) - { - EnterBarrier("before-shutdown" + n); - TestConductor.Exit(leader, 0).Wait(); - EnterBarrier("after-shutdown" + n, "after-unavailable" + n, "after-down" + n, "completed" + n); - } - else if (Myself == leader) - { - EnterBarrier("before-shutdown" + n, "after-shutdown" + n); - // this node will be shutdown by the controller and doesn't participate in more barriers - } - else if (Myself == aUser) - { - var leaderAddress = GetAddress(leader); - EnterBarrier("before-shutdown" + n, "after-shutdown" + n); - - // detect failure - MarkNodeAsUnavailable(leaderAddress); - AwaitAssert(() => ClusterView.UnreachableMembers.Select(x => x.Address).Contains(leaderAddress).ShouldBeTrue()); - EnterBarrier("after-unavailable" + n); - - // user marks the shutdown leader as DOWN - Cluster.Down(leaderAddress); - - // removed - AwaitAssert((() => ClusterView.UnreachableMembers.Select(x => x.Address).Contains(leaderAddress).ShouldBeFalse())); - EnterBarrier("after-down" + n, "completed" + n); - } - else if (remainingRoles.Contains(Myself)) - { - // remaining cluster nodes, not shutdown - var leaderAddress = GetAddress(leader); - EnterBarrier("before-shutdown" + n, "after-shutdown" + n); - - AwaitAssert(() => ClusterView.UnreachableMembers.Select(x => x.Address).Contains(leaderAddress).ShouldBeTrue()); - EnterBarrier("after-unavailable" + n); - - EnterBarrier("after-down" + n); - AwaitMembersUp(currentRoles.Count - 1); - var nextExpectedLeader = remainingRoles.First(); - ClusterView.IsLeader.ShouldBe(Myself == nextExpectedLeader); - AssertLeaderIn(remainingRoles); - - EnterBarrier("completed" + n); - } + // remaining cluster nodes, not shutdown + var leaderAddress = GetAddress(leader); + await EnterBarrierAsync("before-shutdown" + n, "after-shutdown" + n); + + await AwaitAssertAsync(() => ClusterView.UnreachableMembers.Select(x => x.Address).Contains(leaderAddress).ShouldBeTrue()); + await EnterBarrierAsync("after-unavailable" + n); + + await EnterBarrierAsync("after-down" + n); + await AwaitMembersUpAsync(currentRoles.Count - 1); + var nextExpectedLeader = remainingRoles.First(); + ClusterView.IsLeader.ShouldBe(Myself == nextExpectedLeader); + AssertLeaderIn(remainingRoles); + + await EnterBarrierAsync("completed" + n); } + } - public void Cluster_of_four_nodes_must_be_able_to_reelect_single_leader_after_leader_has_left() + public async Task Cluster_of_four_nodes_must_be_able_to_reelect_single_leader_after_leader_has_leftAsync() + { + await WithinAsync(TimeSpan.FromSeconds(30), async () => { - Within(TimeSpan.FromSeconds(30), () => - { - ShutdownLeaderAndVerifyNewLeader(0); - EnterBarrier("after-2"); - }); - } + await ShutdownLeaderAndVerifyNewLeaderAsync(0); + await EnterBarrierAsync("after-2"); + }); + } - public void Cluster_of_four_nodes_must_be_able_to_reelect_single_leader_after_leader_has_left_again() + public async Task Cluster_of_four_nodes_must_be_able_to_reelect_single_leader_after_leader_has_left_again() + { + await WithinAsync(TimeSpan.FromSeconds(30), async () => { - Within(TimeSpan.FromSeconds(30), () => - { - ShutdownLeaderAndVerifyNewLeader(1); - EnterBarrier("after-3"); - }); - } + await ShutdownLeaderAndVerifyNewLeaderAsync(1); + await EnterBarrierAsync("after-3"); + }); } -} +} \ No newline at end of file diff --git a/src/core/Akka.Cluster.Tests.MultiNode/StressSpec.cs b/src/core/Akka.Cluster.Tests.MultiNode/StressSpec.cs index 34c7b826c73..e9a95fa8395 100644 --- a/src/core/Akka.Cluster.Tests.MultiNode/StressSpec.cs +++ b/src/core/Akka.Cluster.Tests.MultiNode/StressSpec.cs @@ -13,6 +13,7 @@ using System.Runtime.InteropServices; using System.Text; using System.Threading; +using System.Threading.Tasks; using Akka.Actor; using Akka.Cluster.TestKit; using Akka.Configuration; @@ -30,23 +31,23 @@ using Google.Protobuf.WellKnownTypes; using Environment = System.Environment; -namespace Akka.Cluster.Tests.MultiNode +namespace Akka.Cluster.Tests.MultiNode; + +public class StressSpecConfig : MultiNodeConfig { - public class StressSpecConfig : MultiNodeConfig + public int TotalNumberOfNodes => Environment.GetEnvironmentVariable("MNTR_STRESSSPEC_NODECOUNT") switch { - public int TotalNumberOfNodes => Environment.GetEnvironmentVariable("MNTR_STRESSSPEC_NODECOUNT") switch - { - string e when string.IsNullOrEmpty(e) => 13, - string val => int.Parse(val), - _ => 13 - }; + string e when string.IsNullOrEmpty(e) => 13, + string val => int.Parse(val), + _ => 13 + }; - public StressSpecConfig() - { - foreach (var i in Enumerable.Range(1, TotalNumberOfNodes)) - Role("node-" + i); + public StressSpecConfig() + { + foreach (var i in Enumerable.Range(1, TotalNumberOfNodes)) + Role("node-" + i); - CommonConfig = ConfigurationFactory.ParseString(@" + CommonConfig = ConfigurationFactory.ParseString(@" akka.test.cluster-stress-spec { infolog = on # scale the nr-of-nodes* settings with this factor @@ -116,1309 +117,1323 @@ public StressSpecConfig() } }"); - TestTransport = true; - } + TestTransport = true; + } - public class Settings - { - private readonly Config _testConfig; + public class Settings + { + private readonly Config _testConfig; - public Settings(Config config, int totalNumberOfNodes) + public Settings(Config config, int totalNumberOfNodes) + { + TotalNumberOfNodes = totalNumberOfNodes; + _testConfig = config.GetConfig("akka.test.cluster-stress-spec"); + Infolog = _testConfig.GetBoolean("infolog"); + NFactor = _testConfig.GetInt("nr-of-nodes-factor"); + NumberOfSeedNodes = _testConfig.GetInt("nr-of-seed-nodes"); + NumberOfNodesJoiningToSeedNodesInitially = + _testConfig.GetInt("nr-of-nodes-joining-to-seed-initially") * NFactor; + NumberOfNodesJoiningOneByOneSmall = _testConfig.GetInt("nr-of-nodes-joining-one-by-one-small") * NFactor; + NumberOfNodesJoiningOneByOneLarge = _testConfig.GetInt("nr-of-nodes-joining-one-by-one-large") * NFactor; + NumberOfNodesJoiningToOneNode = _testConfig.GetInt("nr-of-nodes-joining-to-one") * NFactor; + // remaining will join to seed nodes + NumberOfNodesJoiningToSeedNodes = (totalNumberOfNodes - NumberOfSeedNodes - + NumberOfNodesJoiningToSeedNodesInitially - + NumberOfNodesJoiningOneByOneSmall - + NumberOfNodesJoiningOneByOneLarge - NumberOfNodesJoiningToOneNode); + if (NumberOfNodesJoiningToSeedNodes < 0) + throw new ArgumentOutOfRangeException("nr-of-nodes-joining-*", + $"too many configured nr-of-nodes-joining-*, total should be <= {totalNumberOfNodes}"); + + NumberOfNodesLeavingOneByOneSmall = _testConfig.GetInt("nr-of-nodes-leaving-one-by-one-small") * NFactor; + NumberOfNodesLeavingOneByOneLarge = _testConfig.GetInt("nr-of-nodes-leaving-one-by-one-large") * NFactor; + NumberOfNodesLeaving = _testConfig.GetInt("nr-of-nodes-leaving") * NFactor; + NumberOfNodesShutdownOneByOneSmall = _testConfig.GetInt("nr-of-nodes-shutdown-one-by-one-small") * NFactor; + NumberOfNodesShutdownOneByOneLarge = _testConfig.GetInt("nr-of-nodes-shutdown-one-by-one-large") * NFactor; + NumberOfNodesShutdown = _testConfig.GetInt("nr-of-nodes-shutdown") * NFactor; + NumberOfNodesPartition = _testConfig.GetInt("nr-of-nodes-partition") * NFactor; + NumberOfNodesJoinRemove = _testConfig.GetInt("nr-of-nodes-join-remove"); // not scaled by nodes factor + + DFactor = _testConfig.GetInt("duration-factor"); + JoinRemoveDuration = TimeSpan.FromMilliseconds(_testConfig.GetTimeSpan("join-remove-duration").TotalMilliseconds * DFactor); + IdleGossipDuration = TimeSpan.FromMilliseconds(_testConfig.GetTimeSpan("idle-gossip-duration").TotalMilliseconds * DFactor); + ExpectedTestDuration = TimeSpan.FromMilliseconds(_testConfig.GetTimeSpan("expected-test-duration").TotalMilliseconds * DFactor); + ConvergenceWithinFactor = _testConfig.GetDouble("convergence-within-factor"); + + if (NumberOfSeedNodes + NumberOfNodesJoiningToSeedNodesInitially + NumberOfNodesJoiningOneByOneSmall + + NumberOfNodesJoiningOneByOneLarge + NumberOfNodesJoiningToOneNode + + NumberOfNodesJoiningToSeedNodes > totalNumberOfNodes) { - TotalNumberOfNodes = totalNumberOfNodes; - _testConfig = config.GetConfig("akka.test.cluster-stress-spec"); - Infolog = _testConfig.GetBoolean("infolog"); - NFactor = _testConfig.GetInt("nr-of-nodes-factor"); - NumberOfSeedNodes = _testConfig.GetInt("nr-of-seed-nodes"); - NumberOfNodesJoiningToSeedNodesInitially = - _testConfig.GetInt("nr-of-nodes-joining-to-seed-initially") * NFactor; - NumberOfNodesJoiningOneByOneSmall = _testConfig.GetInt("nr-of-nodes-joining-one-by-one-small") * NFactor; - NumberOfNodesJoiningOneByOneLarge = _testConfig.GetInt("nr-of-nodes-joining-one-by-one-large") * NFactor; - NumberOfNodesJoiningToOneNode = _testConfig.GetInt("nr-of-nodes-joining-to-one") * NFactor; - // remaining will join to seed nodes - NumberOfNodesJoiningToSeedNodes = (totalNumberOfNodes - NumberOfSeedNodes - - NumberOfNodesJoiningToSeedNodesInitially - - NumberOfNodesJoiningOneByOneSmall - - NumberOfNodesJoiningOneByOneLarge - NumberOfNodesJoiningToOneNode); - if (NumberOfNodesJoiningToSeedNodes < 0) - throw new ArgumentOutOfRangeException("nr-of-nodes-joining-*", - $"too many configured nr-of-nodes-joining-*, total should be <= {totalNumberOfNodes}"); - - NumberOfNodesLeavingOneByOneSmall = _testConfig.GetInt("nr-of-nodes-leaving-one-by-one-small") * NFactor; - NumberOfNodesLeavingOneByOneLarge = _testConfig.GetInt("nr-of-nodes-leaving-one-by-one-large") * NFactor; - NumberOfNodesLeaving = _testConfig.GetInt("nr-of-nodes-leaving") * NFactor; - NumberOfNodesShutdownOneByOneSmall = _testConfig.GetInt("nr-of-nodes-shutdown-one-by-one-small") * NFactor; - NumberOfNodesShutdownOneByOneLarge = _testConfig.GetInt("nr-of-nodes-shutdown-one-by-one-large") * NFactor; - NumberOfNodesShutdown = _testConfig.GetInt("nr-of-nodes-shutdown") * NFactor; - NumberOfNodesPartition = _testConfig.GetInt("nr-of-nodes-partition") * NFactor; - NumberOfNodesJoinRemove = _testConfig.GetInt("nr-of-nodes-join-remove"); // not scaled by nodes factor - - DFactor = _testConfig.GetInt("duration-factor"); - JoinRemoveDuration = TimeSpan.FromMilliseconds(_testConfig.GetTimeSpan("join-remove-duration").TotalMilliseconds * DFactor); - IdleGossipDuration = TimeSpan.FromMilliseconds(_testConfig.GetTimeSpan("idle-gossip-duration").TotalMilliseconds * DFactor); - ExpectedTestDuration = TimeSpan.FromMilliseconds(_testConfig.GetTimeSpan("expected-test-duration").TotalMilliseconds * DFactor); - ConvergenceWithinFactor = _testConfig.GetDouble("convergence-within-factor"); - - if (NumberOfSeedNodes + NumberOfNodesJoiningToSeedNodesInitially + NumberOfNodesJoiningOneByOneSmall + - NumberOfNodesJoiningOneByOneLarge + NumberOfNodesJoiningToOneNode + - NumberOfNodesJoiningToSeedNodes > totalNumberOfNodes) - { - throw new ArgumentOutOfRangeException("nr-of-nodes-joining-*", - $"specified number of joining nodes <= {totalNumberOfNodes}"); - } + throw new ArgumentOutOfRangeException("nr-of-nodes-joining-*", + $"specified number of joining nodes <= {totalNumberOfNodes}"); + } - // don't shutdown the 3 nodes hosting the master actors - if (NumberOfNodesLeavingOneByOneSmall + NumberOfNodesLeavingOneByOneLarge + NumberOfNodesLeaving + - NumberOfNodesShutdownOneByOneSmall + NumberOfNodesShutdownOneByOneLarge + NumberOfNodesShutdown > - totalNumberOfNodes - 3) - { - throw new ArgumentOutOfRangeException("nr-of-nodes-leaving-*", - $"specified number of leaving/shutdown nodes <= {totalNumberOfNodes - 3}"); - } + // don't shutdown the 3 nodes hosting the master actors + if (NumberOfNodesLeavingOneByOneSmall + NumberOfNodesLeavingOneByOneLarge + NumberOfNodesLeaving + + NumberOfNodesShutdownOneByOneSmall + NumberOfNodesShutdownOneByOneLarge + NumberOfNodesShutdown > + totalNumberOfNodes - 3) + { + throw new ArgumentOutOfRangeException("nr-of-nodes-leaving-*", + $"specified number of leaving/shutdown nodes <= {totalNumberOfNodes - 3}"); + } - if (NumberOfNodesJoinRemove > totalNumberOfNodes) - { - throw new ArgumentOutOfRangeException("nr-of-nodes-join-remove*", - $"nr-of-nodes-join-remove should be <= {totalNumberOfNodes}"); - } + if (NumberOfNodesJoinRemove > totalNumberOfNodes) + { + throw new ArgumentOutOfRangeException("nr-of-nodes-join-remove*", + $"nr-of-nodes-join-remove should be <= {totalNumberOfNodes}"); } + } - public int TotalNumberOfNodes { get; } + public int TotalNumberOfNodes { get; } - public bool Infolog { get; } - public int NFactor { get; } + public bool Infolog { get; } + public int NFactor { get; } - public int NumberOfSeedNodes { get; } + public int NumberOfSeedNodes { get; } - public int NumberOfNodesJoiningToSeedNodesInitially { get; } + public int NumberOfNodesJoiningToSeedNodesInitially { get; } - public int NumberOfNodesJoiningOneByOneSmall { get; } + public int NumberOfNodesJoiningOneByOneSmall { get; } - public int NumberOfNodesJoiningOneByOneLarge { get; } + public int NumberOfNodesJoiningOneByOneLarge { get; } - public int NumberOfNodesJoiningToOneNode { get; } + public int NumberOfNodesJoiningToOneNode { get; } - public int NumberOfNodesJoiningToSeedNodes { get; } + public int NumberOfNodesJoiningToSeedNodes { get; } - public int NumberOfNodesLeavingOneByOneSmall { get; } + public int NumberOfNodesLeavingOneByOneSmall { get; } - public int NumberOfNodesLeavingOneByOneLarge { get; } + public int NumberOfNodesLeavingOneByOneLarge { get; } - public int NumberOfNodesLeaving { get; } + public int NumberOfNodesLeaving { get; } - public int NumberOfNodesShutdownOneByOneSmall { get; } + public int NumberOfNodesShutdownOneByOneSmall { get; } - public int NumberOfNodesShutdownOneByOneLarge { get; } + public int NumberOfNodesShutdownOneByOneLarge { get; } - public int NumberOfNodesShutdown { get; } + public int NumberOfNodesShutdown { get; } - public int NumberOfNodesPartition { get; } + public int NumberOfNodesPartition { get; } - public int NumberOfNodesJoinRemove { get; } + public int NumberOfNodesJoinRemove { get; } - public int DFactor { get; } + public int DFactor { get; } - public TimeSpan JoinRemoveDuration { get; } + public TimeSpan JoinRemoveDuration { get; } - public TimeSpan IdleGossipDuration { get; } + public TimeSpan IdleGossipDuration { get; } - public TimeSpan ExpectedTestDuration { get; } + public TimeSpan ExpectedTestDuration { get; } - public double ConvergenceWithinFactor { get; } + public double ConvergenceWithinFactor { get; } - public override string ToString() - { - return _testConfig.WithFallback($"nrOfNodes={TotalNumberOfNodes}").Root.ToString(2); - } + public override string ToString() + { + return _testConfig.WithFallback($"nrOfNodes={TotalNumberOfNodes}").Root.ToString(2); } } +} - internal sealed class ClusterResult +internal sealed class ClusterResult +{ + public ClusterResult(Address address, TimeSpan duration, GossipStats clusterStats) { - public ClusterResult(Address address, TimeSpan duration, GossipStats clusterStats) - { - Address = address; - Duration = duration; - ClusterStats = clusterStats; - } - - public Address Address { get; } - public TimeSpan Duration { get; } - public GossipStats ClusterStats { get; } + Address = address; + Duration = duration; + ClusterStats = clusterStats; } - internal sealed class AggregatedClusterResult + public Address Address { get; } + public TimeSpan Duration { get; } + public GossipStats ClusterStats { get; } +} + +internal sealed class AggregatedClusterResult +{ + public AggregatedClusterResult(string title, TimeSpan duration, GossipStats clusterStats) { - public AggregatedClusterResult(string title, TimeSpan duration, GossipStats clusterStats) - { - Title = title; - Duration = duration; - ClusterStats = clusterStats; - } + Title = title; + Duration = duration; + ClusterStats = clusterStats; + } - public string Title { get; } + public string Title { get; } - public TimeSpan Duration { get; } + public TimeSpan Duration { get; } - public GossipStats ClusterStats { get; } - } + public GossipStats ClusterStats { get; } +} - /// - /// Central aggregator of cluster statistics and metrics. - /// - /// Reports the result via log periodically and when all - /// expected results has been collected. It shuts down - /// itself when expected results has been collected. - /// - internal class ClusterResultAggregator : ReceiveActor - { - private readonly string _title; - private readonly int _expectedResults; - private readonly StressSpecConfig.Settings _settings; +/// +/// Central aggregator of cluster statistics and metrics. +/// +/// Reports the result via log periodically and when all +/// expected results has been collected. It shuts down +/// itself when expected results has been collected. +/// +internal class ClusterResultAggregator : ReceiveActor +{ + private readonly string _title; + private readonly int _expectedResults; + private readonly StressSpecConfig.Settings _settings; - private readonly ILoggingAdapter _log = Context.GetLogger(); + private readonly ILoggingAdapter _log = Context.GetLogger(); - private Option _reportTo = Option.None; - private ImmutableList _results = ImmutableList.Empty; - private ImmutableSortedDictionary> _phiValuesObservedByNode = - ImmutableSortedDictionary>.Empty.WithComparers(Member.AddressOrdering); - private ImmutableSortedDictionary _clusterStatsObservedByNode = - ImmutableSortedDictionary.Empty.WithComparers(Member.AddressOrdering); + private Option _reportTo = Option.None; + private ImmutableList _results = ImmutableList.Empty; + private ImmutableSortedDictionary> _phiValuesObservedByNode = + ImmutableSortedDictionary>.Empty.WithComparers(Member.AddressOrdering); + private ImmutableSortedDictionary _clusterStatsObservedByNode = + ImmutableSortedDictionary.Empty.WithComparers(Member.AddressOrdering); - public static readonly string FormatPhiHeader = "[Monitor]\t[Subject]\t[count]\t[count phi > 1.0]\t[max phi]"; + public static readonly string FormatPhiHeader = "[Monitor]\t[Subject]\t[count]\t[count phi > 1.0]\t[max phi]"; - public string FormatPhiLine(Address monitor, Address subject, PhiValue phi) - { - return $"{monitor}\t{subject}\t{phi.Count}\t{phi.CountAboveOne}\t{phi.Max:F2}"; - } + public string FormatPhiLine(Address monitor, Address subject, PhiValue phi) + { + return $"{monitor}\t{subject}\t{phi.Count}\t{phi.CountAboveOne}\t{phi.Max:F2}"; + } - public string FormatPhi() + public string FormatPhi() + { + if (_phiValuesObservedByNode.IsEmpty) return string.Empty; + else { - if (_phiValuesObservedByNode.IsEmpty) return string.Empty; - else - { - var lines = (from mon in _phiValuesObservedByNode from phi in mon.Value select FormatPhiLine(mon.Key, phi.Address, phi)); - return FormatPhiHeader + Environment.NewLine + string.Join(Environment.NewLine, lines); - } + var lines = (from mon in _phiValuesObservedByNode from phi in mon.Value select FormatPhiLine(mon.Key, phi.Address, phi)); + return FormatPhiHeader + Environment.NewLine + string.Join(Environment.NewLine, lines); } + } - public TimeSpan MaxDuration => _results.Max(x => x.Duration); + public TimeSpan MaxDuration => _results.Max(x => x.Duration); - public GossipStats TotalGossipStats => - _results.Aggregate(new GossipStats(), (stats, result) => stats += result.ClusterStats); + public GossipStats TotalGossipStats => + _results.Aggregate(new GossipStats(), (stats, result) => stats += result.ClusterStats); - public string FormatStats() + public string FormatStats() + { + string F(ClusterEvent.CurrentInternalStats stats) { - string F(ClusterEvent.CurrentInternalStats stats) - { - return - $"CurrentClusterStats({stats.GossipStats?.ReceivedGossipCount}, {stats.GossipStats?.MergeCount}, " + - $"{stats.GossipStats?.SameCount}, {stats.GossipStats?.NewerCount}, {stats.GossipStats?.OlderCount}," + - $"{stats.SeenBy?.VersionSize}, {stats.SeenBy?.SeenLatest})"; - } - - return string.Join(Environment.NewLine, "ClusterStats(gossip, merge, same, newer, older, vclockSize, seenLatest)" + - Environment.NewLine + - string.Join(Environment.NewLine, _clusterStatsObservedByNode.Select(x => $"{x.Key}\t{F(x.Value)}"))); + return + $"CurrentClusterStats({stats.GossipStats?.ReceivedGossipCount}, {stats.GossipStats?.MergeCount}, " + + $"{stats.GossipStats?.SameCount}, {stats.GossipStats?.NewerCount}, {stats.GossipStats?.OlderCount}," + + $"{stats.SeenBy?.VersionSize}, {stats.SeenBy?.SeenLatest})"; } - public ClusterResultAggregator(string title, int expectedResults, StressSpecConfig.Settings settings) + return string.Join(Environment.NewLine, "ClusterStats(gossip, merge, same, newer, older, vclockSize, seenLatest)" + + Environment.NewLine + + string.Join(Environment.NewLine, _clusterStatsObservedByNode.Select(x => $"{x.Key}\t{F(x.Value)}"))); + } + + public ClusterResultAggregator(string title, int expectedResults, StressSpecConfig.Settings settings) + { + _title = title; + _expectedResults = expectedResults; + _settings = settings; + + Receive(phi => { - _title = title; - _expectedResults = expectedResults; - _settings = settings; + _phiValuesObservedByNode = _phiValuesObservedByNode.SetItem(phi.Address, phi.PhiValues); + }); - Receive(phi => - { - _phiValuesObservedByNode = _phiValuesObservedByNode.SetItem(phi.Address, phi.PhiValues); - }); + Receive(stats => + { + _clusterStatsObservedByNode = _clusterStatsObservedByNode.SetItem(stats.Address, stats.Stats); + }); - Receive(stats => + Receive(_ => + { + if (_settings.Infolog) { - _clusterStatsObservedByNode = _clusterStatsObservedByNode.SetItem(stats.Address, stats.Stats); - }); + _log.Info("BEGIN CLUSTER OPERATION: [{0}] in progress" + Environment.NewLine + "{1}" + Environment.NewLine + "{2}", _title, + FormatPhi(), FormatStats()); + } + }); - Receive(_ => + Receive(r => + { + _results = _results.Add(r); + if (_results.Count == _expectedResults) { + var aggregated = new AggregatedClusterResult(_title, MaxDuration, TotalGossipStats); if (_settings.Infolog) { - _log.Info("BEGIN CLUSTER OPERATION: [{0}] in progress" + Environment.NewLine + "{1}" + Environment.NewLine + "{2}", _title, - FormatPhi(), FormatStats()); - } - }); - - Receive(r => - { - _results = _results.Add(r); - if (_results.Count == _expectedResults) - { - var aggregated = new AggregatedClusterResult(_title, MaxDuration, TotalGossipStats); - if (_settings.Infolog) - { - _log.Info("END CLUSTER OPERATION: [{0}] completed in [{1}] ms" + Environment.NewLine + "{2}" + - Environment.NewLine + "{3}" + Environment.NewLine + "{4}", _title, aggregated.Duration.TotalMilliseconds, + _log.Info("END CLUSTER OPERATION: [{0}] completed in [{1}] ms" + Environment.NewLine + "{2}" + + Environment.NewLine + "{3}" + Environment.NewLine + "{4}", _title, aggregated.Duration.TotalMilliseconds, aggregated.ClusterStats, FormatPhi(), FormatStats()); - } - _reportTo.OnSuccess(r => r.Tell(aggregated)); - Context.Stop(Self); } - }); + _reportTo.OnSuccess(r => r.Tell(aggregated)); + Context.Stop(Self); + } + }); - Receive(_ => { }); - Receive(re => - { - _reportTo = re.Ref; - }); - } + Receive(_ => { }); + Receive(re => + { + _reportTo = re.Ref; + }); } +} - /// - /// Keeps cluster statistics and metrics reported by . - /// - /// Logs the list of historical results when a new is received. - /// - internal class ClusterResultHistory : ReceiveActor - { - private ILoggingAdapter _log = Context.GetLogger(); - private ImmutableList _history = ImmutableList.Empty; +/// +/// Keeps cluster statistics and metrics reported by . +/// +/// Logs the list of historical results when a new is received. +/// +internal class ClusterResultHistory : ReceiveActor +{ + private ILoggingAdapter _log = Context.GetLogger(); + private ImmutableList _history = ImmutableList.Empty; - public ClusterResultHistory() + public ClusterResultHistory() + { + Receive(result => { - Receive(result => - { - _history = _history.Add(result); - }); - } - - public static readonly string FormatHistoryHeader = "[Title]\t[Duration (ms)]\t[GossipStats(gossip, merge, same, newer, older)]"; + _history = _history.Add(result); + }); + } - public string FormatHistoryLine(AggregatedClusterResult result) - { - return $"{result.Title}\t{result.Duration.TotalMilliseconds}\t{result.ClusterStats}"; - } + public static readonly string FormatHistoryHeader = "[Title]\t[Duration (ms)]\t[GossipStats(gossip, merge, same, newer, older)]"; - public string FormatHistory => FormatHistoryHeader + Environment.NewLine + - string.Join(Environment.NewLine, _history.Select(x => FormatHistoryLine(x))); + public string FormatHistoryLine(AggregatedClusterResult result) + { + return $"{result.Title}\t{result.Duration.TotalMilliseconds}\t{result.ClusterStats}"; } - /// - /// Collect phi values of the failure detector and report to the central - /// - internal class PhiObserver : ReceiveActor - { - private readonly Cluster _cluster = Cluster.Get(Context.System); - private readonly ILoggingAdapter _log = Context.GetLogger(); - private ImmutableDictionary _phiByNode = ImmutableDictionary.Empty; + public string FormatHistory => FormatHistoryHeader + Environment.NewLine + + string.Join(Environment.NewLine, _history.Select(x => FormatHistoryLine(x))); +} + +/// +/// Collect phi values of the failure detector and report to the central +/// +internal class PhiObserver : ReceiveActor +{ + private readonly Cluster _cluster = Cluster.Get(Context.System); + private readonly ILoggingAdapter _log = Context.GetLogger(); + private ImmutableDictionary _phiByNode = ImmutableDictionary.Empty; - private Option _reportTo = Option.None; - private HashSet
_nodes = new(); + private Option _reportTo = Option.None; + private HashSet
_nodes = new(); - private ICancelable _checkPhiTask = Context.System.Scheduler.ScheduleTellRepeatedlyCancelable( - TimeSpan.FromSeconds(1), - TimeSpan.FromSeconds(1), Context.Self, PhiTick.Instance, ActorRefs.NoSender); + private ICancelable _checkPhiTask = Context.System.Scheduler.ScheduleTellRepeatedlyCancelable( + TimeSpan.FromSeconds(1), + TimeSpan.FromSeconds(1), Context.Self, PhiTick.Instance, ActorRefs.NoSender); - private double Phi(Address address) + private double Phi(Address address) + { + return _cluster.FailureDetector switch { - return _cluster.FailureDetector switch + DefaultFailureDetectorRegistry
reg => (reg.GetFailureDetector(address)) switch { - DefaultFailureDetectorRegistry
reg => (reg.GetFailureDetector(address)) switch - { - PhiAccrualFailureDetector fd => fd.CurrentPhi, - _ => 0.0d - }, + PhiAccrualFailureDetector fd => fd.CurrentPhi, _ => 0.0d - }; - } + }, + _ => 0.0d + }; + } - private PhiValue PhiByNodeDefault(Address address) + private PhiValue PhiByNodeDefault(Address address) + { + if (!_phiByNode.ContainsKey(address)) { - if (!_phiByNode.ContainsKey(address)) - { - // populate default value - _phiByNode = _phiByNode.Add(address, new PhiValue(address, 0, 0, 0.0d)); - } - - return _phiByNode[address]; + // populate default value + _phiByNode = _phiByNode.Add(address, new PhiValue(address, 0, 0, 0.0d)); } - public PhiObserver() + return _phiByNode[address]; + } + + public PhiObserver() + { + Receive(_ => { - Receive(_ => + foreach (var node in _nodes) { - foreach (var node in _nodes) - { - var previous = PhiByNodeDefault(node); - var p = Phi(node); + var previous = PhiByNodeDefault(node); + var p = Phi(node); - if (p > 0 || _cluster.FailureDetector.IsMonitoring(node)) + if (p > 0 || _cluster.FailureDetector.IsMonitoring(node)) + { + if (double.IsInfinity(p)) { - if (double.IsInfinity(p)) + _log.Warning("Detected phi value of infinity for [{0}] - ", node); + var (history, time) = _cluster.FailureDetector.GetFailureDetector(node) switch { - _log.Warning("Detected phi value of infinity for [{0}] - ", node); - var (history, time) = _cluster.FailureDetector.GetFailureDetector(node) switch - { - PhiAccrualFailureDetector fd => (fd.State.History, fd.State.TimeStamp), - _ => (HeartbeatHistory.Apply(1), null) - }; - _log.Warning("PhiValues: (Timestamp={0}, Mean={1}, Variance={2}, StdDeviation={3}, Intervals=[{4}])",time, - history.Mean, history.Variance, history.StdDeviation, - string.Join(",", history.Intervals)); - } - - var aboveOne = !double.IsInfinity(p) && p > 1.0d ? 1 : 0; - _phiByNode = _phiByNode.SetItem(node, new PhiValue(node, - previous.CountAboveOne + aboveOne, - previous.Count + 1, - Math.Max(previous.Max, p))); + PhiAccrualFailureDetector fd => (fd.State.History, fd.State.TimeStamp), + _ => (HeartbeatHistory.Apply(1), null) + }; + _log.Warning("PhiValues: (Timestamp={0}, Mean={1}, Variance={2}, StdDeviation={3}, Intervals=[{4}])",time, + history.Mean, history.Variance, history.StdDeviation, + string.Join(",", history.Intervals)); } - } - - var phiSet = _phiByNode.Values.ToImmutableSortedSet(); - _reportTo.OnSuccess(r => r.Tell(new PhiResult(_cluster.SelfAddress, phiSet))); - }); - Receive(state => - { - _nodes = new HashSet
(state.Members.Select(x => x.Address)); - }); + var aboveOne = !double.IsInfinity(p) && p > 1.0d ? 1 : 0; + _phiByNode = _phiByNode.SetItem(node, new PhiValue(node, + previous.CountAboveOne + aboveOne, + previous.Count + 1, + Math.Max(previous.Max, p))); + } + } - Receive(m => - { - _nodes.Add(m.Member.Address); - }); + var phiSet = _phiByNode.Values.ToImmutableSortedSet(); + _reportTo.OnSuccess(r => r.Tell(new PhiResult(_cluster.SelfAddress, phiSet))); + }); - Receive(r => - { - _reportTo.OnSuccess(o => Context.Unwatch(o)); - _reportTo = r.Ref; - _reportTo.OnSuccess(n => Context.Watch(n)); - }); + Receive(state => + { + _nodes = new HashSet
(state.Members.Select(x => x.Address)); + }); - Receive(_ => - { - if (_reportTo.HasValue) - _reportTo = Option.None; - }); + Receive(m => + { + _nodes.Add(m.Member.Address); + }); - Receive(_ => - { - _phiByNode = ImmutableDictionary.Empty; - _nodes.Clear(); - _cluster.Unsubscribe(Self); - _cluster.Subscribe(Self, typeof(ClusterEvent.IMemberEvent)); - }); - } + Receive(r => + { + _reportTo.OnSuccess(o => Context.Unwatch(o)); + _reportTo = r.Ref; + _reportTo.OnSuccess(n => Context.Watch(n)); + }); - protected override void PreStart() + Receive(_ => { - _cluster.Subscribe(Self, typeof(ClusterEvent.IMemberEvent)); - } + if (_reportTo.HasValue) + _reportTo = Option.None; + }); - protected override void PostStop() + Receive(_ => { + _phiByNode = ImmutableDictionary.Empty; + _nodes.Clear(); _cluster.Unsubscribe(Self); - _checkPhiTask.Cancel(); - base.PostStop(); - } + _cluster.Subscribe(Self, typeof(ClusterEvent.IMemberEvent)); + }); } - internal readonly struct PhiValue : IComparable + protected override void PreStart() { - public PhiValue(Address address, int countAboveOne, int count, double max) - { - Address = address; - CountAboveOne = countAboveOne; - Count = count; - Max = max; - } - - public Address Address { get; } - public int CountAboveOne { get; } - public int Count { get; } - public double Max { get; } + _cluster.Subscribe(Self, typeof(ClusterEvent.IMemberEvent)); + } - public int CompareTo(PhiValue other) - { - return Member.AddressOrdering.Compare(Address, other.Address); - } + protected override void PostStop() + { + _cluster.Unsubscribe(Self); + _checkPhiTask.Cancel(); + base.PostStop(); } +} - internal readonly struct PhiResult +internal readonly struct PhiValue : IComparable +{ + public PhiValue(Address address, int countAboveOne, int count, double max) { - public PhiResult(Address address, ImmutableSortedSet phiValues) - { - Address = address; - PhiValues = phiValues; - } + Address = address; + CountAboveOne = countAboveOne; + Count = count; + Max = max; + } - public Address Address { get; } + public Address Address { get; } + public int CountAboveOne { get; } + public int Count { get; } + public double Max { get; } - public ImmutableSortedSet PhiValues { get; } + public int CompareTo(PhiValue other) + { + return Member.AddressOrdering.Compare(Address, other.Address); } +} - internal class StatsObserver : ReceiveActor +internal readonly struct PhiResult +{ + public PhiResult(Address address, ImmutableSortedSet phiValues) { - private readonly Cluster _cluster = Cluster.Get(Context.System); - private Option _reportTo = Option.None; - private Option _startStats = Option.None; + Address = address; + PhiValues = phiValues; + } - protected override void PreStart() - { - _cluster.Subscribe(Self, typeof(ClusterEvent.CurrentInternalStats)); - } + public Address Address { get; } - protected override void PostStop() - { - _cluster.Unsubscribe(Self); - } + public ImmutableSortedSet PhiValues { get; } +} - public StatsObserver() - { - Receive(stats => - { - var gossipStats = stats.GossipStats; - var vclockStats = stats.SeenBy; +internal class StatsObserver : ReceiveActor +{ + private readonly Cluster _cluster = Cluster.Get(Context.System); + private Option _reportTo = Option.None; + private Option _startStats = Option.None; - GossipStats MatchStats() - { - if (!_startStats.HasValue) - { - _startStats = gossipStats; - return gossipStats; - } + protected override void PreStart() + { + _cluster.Subscribe(Self, typeof(ClusterEvent.CurrentInternalStats)); + } - return gossipStats -_startStats.Value; - } + protected override void PostStop() + { + _cluster.Unsubscribe(Self); + } - var diff = MatchStats(); - var res = new StatsResult(_cluster.SelfAddress, new ClusterEvent.CurrentInternalStats(diff, vclockStats)); - _reportTo.OnSuccess(a => a.Tell(res)); - }); + public StatsObserver() + { + Receive(stats => + { + var gossipStats = stats.GossipStats; + var vclockStats = stats.SeenBy; - Receive(r => + GossipStats MatchStats() { - _reportTo.OnSuccess(o => Context.Unwatch(o)); - _reportTo = r.Ref; - _reportTo.OnSuccess(n => Context.Watch(n)); - }); + if (!_startStats.HasValue) + { + _startStats = gossipStats; + return gossipStats; + } - Receive(_ => - { - if (_reportTo.HasValue) - _reportTo = Option.None; - }); + return gossipStats -_startStats.Value; + } - Receive(_ => - { - _startStats = Option.None; - }); + var diff = MatchStats(); + var res = new StatsResult(_cluster.SelfAddress, new ClusterEvent.CurrentInternalStats(diff, vclockStats)); + _reportTo.OnSuccess(a => a.Tell(res)); + }); - // nothing interesting here - Receive(_ => { }); - } - } + Receive(r => + { + _reportTo.OnSuccess(o => Context.Unwatch(o)); + _reportTo = r.Ref; + _reportTo.OnSuccess(n => Context.Watch(n)); + }); - /// - /// Used for remote death watch testing - /// - internal class Watchee : ActorBase - { - protected override bool Receive(object message) + Receive(_ => { - return true; - } + if (_reportTo.HasValue) + _reportTo = Option.None; + }); + + Receive(_ => + { + _startStats = Option.None; + }); + + // nothing interesting here + Receive(_ => { }); } +} - internal sealed class Begin +/// +/// Used for remote death watch testing +/// +internal class Watchee : ActorBase +{ + protected override bool Receive(object message) { - public static readonly Begin Instance = new(); - private Begin() { } + return true; } +} + +internal sealed class Begin +{ + public static readonly Begin Instance = new(); + private Begin() { } +} + +internal sealed class End +{ + public static readonly End Instance = new(); + private End() { } +} + +internal sealed class RetryTick +{ + public static readonly RetryTick Instance = new(); + private RetryTick() { } +} + +internal sealed class ReportTick +{ + public static readonly ReportTick Instance = new(); + private ReportTick() { } +} + +internal sealed class PhiTick +{ + public static readonly PhiTick Instance = new(); + private PhiTick() { } +} - internal sealed class End +internal sealed class ReportTo +{ + public ReportTo(Option @ref) { - public static readonly End Instance = new(); - private End() { } + Ref = @ref; } - internal sealed class RetryTick + public Option Ref { get; } +} + +internal sealed class StatsResult +{ + public StatsResult(Address address, ClusterEvent.CurrentInternalStats stats) { - public static readonly RetryTick Instance = new(); - private RetryTick() { } + Address = address; + Stats = stats; } - internal sealed class ReportTick + public Address Address { get; } + + public Akka.Cluster.ClusterEvent.CurrentInternalStats Stats { get; } +} + +internal sealed class Reset +{ + public static readonly Reset Instance = new(); + private Reset() { } +} + +internal class MeasureDurationUntilDown : ReceiveActor +{ + private readonly Cluster _cluster = Cluster.Get(Context.System); + private readonly long _startTime; + private readonly ILoggingAdapter _log = Context.GetLogger(); + public MeasureDurationUntilDown() { - public static readonly ReportTick Instance = new(); - private ReportTick() { } + _startTime = MonotonicClock.GetTicks(); + + Receive(d => + { + var m = d.Member; + if (m.UniqueAddress == _cluster.SelfUniqueAddress) + { + _log.Info("Downed [{0}] after [{1} ms]", _cluster.SelfAddress, TimeSpan.FromTicks(MonotonicClock.GetTicks() - _startTime).TotalMilliseconds); + } + }); + + Receive(_ => { }); } - internal sealed class PhiTick + protected override void PreStart() { - public static readonly PhiTick Instance = new(); - private PhiTick() { } + _cluster.Subscribe(Self, ClusterEvent.SubscriptionInitialStateMode.InitialStateAsSnapshot, typeof(ClusterEvent.MemberDowned)); } +} - internal sealed class ReportTo - { - public ReportTo(Option @ref) - { - Ref = @ref; - } +public class StressSpec : MultiNodeClusterSpec +{ + public StressSpecConfig.Settings Settings { get; } + public TestProbe IdentifyProbe; + + protected override TimeSpan ShutdownTimeout => Dilated(TimeSpan.FromSeconds(30)); - public Option Ref { get; } + public int Step = 0; + public int NbrUsedRoles = 0; + + public override void MuteLog(ActorSystem sys = null) + { + sys ??= Sys; + base.MuteLog(sys); + Sys.EventStream.Publish(new Mute(new ErrorFilter(typeof(ApplicationException), new ContainsString("Simulated exception")))); + MuteDeadLetters(sys, typeof(AggregatedClusterResult), typeof(StatsResult), typeof(PhiResult), typeof(RetryTick)); } - internal sealed class StatsResult + public StressSpec() : this(new StressSpecConfig()){ } + + protected StressSpec(StressSpecConfig config) : base(config, typeof(StressSpec)) { - public StatsResult(Address address, ClusterEvent.CurrentInternalStats stats) + Settings = new StressSpecConfig.Settings(Sys.Settings.Config, config.TotalNumberOfNodes); + ClusterResultHistory = new Lazy(() => { - Address = address; - Stats = stats; - } + if (Settings.Infolog) + return Sys.ActorOf(Props.Create(() => new ClusterResultHistory()), "resultHistory"); + return Sys.DeadLetters; + }); - public Address Address { get; } + PhiObserver = new Lazy(() => + { + return Sys.ActorOf(Props.Create(() => new PhiObserver()), "phiObserver"); + }); - public Akka.Cluster.ClusterEvent.CurrentInternalStats Stats { get; } + StatsObserver = new Lazy(() => + { + return Sys.ActorOf(Props.Create(() => new StatsObserver()), "statsObserver"); + }); } - internal sealed class Reset + protected override void AtStartup() { - public static readonly Reset Instance = new(); - private Reset() { } + IdentifyProbe = CreateTestProbe(); + base.AtStartup(); } - internal class MeasureDurationUntilDown : ReceiveActor + public string ClrInfo() { - private readonly Cluster _cluster = Cluster.Get(Context.System); - private readonly long _startTime; - private readonly ILoggingAdapter _log = Context.GetLogger(); - public MeasureDurationUntilDown() - { - _startTime = MonotonicClock.GetTicks(); + var sb = new StringBuilder(); + sb.Append("Operating System: ") + .Append(Environment.OSVersion.Platform) + .Append(", ") + .Append(RuntimeInformation.ProcessArchitecture.ToString()) + .Append(", ") + .Append(Environment.OSVersion.VersionString) + .AppendLine(); + + sb.Append("CLR: ") + .Append(RuntimeInformation.FrameworkDescription) + .AppendLine(); + + sb.Append("Processors: ").Append(Environment.ProcessorCount) + .AppendLine() + .Append("Load average: ").Append("can't be easily measured on .NET Core") // TODO: fix + .AppendLine() + .Append("Thread count: ") + .Append(Process.GetCurrentProcess().Threads.Count) + .AppendLine(); + + sb.Append("Memory: ") + .Append(" (") + .Append(Process.GetCurrentProcess().WorkingSet64 / 1024 / 1024) + .Append(" - ") + .Append(Process.GetCurrentProcess().PeakWorkingSet64 / 1024 / 1024) + .Append(") MB [working set / peak working set]"); + + sb.AppendLine("Args: ").Append(string.Join(Environment.NewLine, Environment.GetCommandLineArgs())) + .AppendLine(); + + return sb.ToString(); + } - Receive(d => - { - var m = d.Member; - if (m.UniqueAddress == _cluster.SelfUniqueAddress) - { - _log.Info("Downed [{0}] after [{1} ms]", _cluster.SelfAddress, TimeSpan.FromTicks(MonotonicClock.GetTicks() - _startTime).TotalMilliseconds); - } - }); + public ImmutableList SeedNodes => Roles.Take(Settings.NumberOfSeedNodes).ToImmutableList(); - Receive(_ => { }); - } + internal GossipStats LatestGossipStats => Cluster.ReadView.LatestStats.GossipStats; - protected override void PreStart() - { - _cluster.Subscribe(Self, ClusterEvent.SubscriptionInitialStateMode.InitialStateAsSnapshot, typeof(ClusterEvent.MemberDowned)); - } - } + public Lazy ClusterResultHistory { get; } + + public Lazy PhiObserver { get; } - public class StressSpec : MultiNodeClusterSpec + public Lazy StatsObserver { get; } + + public Option ClusterResultAggregator() { - public StressSpecConfig.Settings Settings { get; } - public TestProbe IdentifyProbe; + Sys.ActorSelection(new RootActorPath(GetAddress(Roles.First())) / "user" / ("result" + Step)) + .Tell(new Identify(Step), IdentifyProbe.Ref); + return Option.Create(IdentifyProbe.ExpectMsg().Subject); + } - protected override TimeSpan ShutdownTimeout => Dilated(TimeSpan.FromSeconds(30)); + public async Task CreateResultAggregatorAsync(string title, int expectedResults, bool includeInHistory) + { + RunOn(() => + { + var aggregator = Sys.ActorOf( + Props.Create(() => new ClusterResultAggregator(title, expectedResults, Settings)) + .WithDeploy(Deploy.Local), "result" + Step); - public int Step = 0; - public int NbrUsedRoles = 0; + if (includeInHistory && Settings.Infolog) + { + aggregator.Tell(new ReportTo(Option.Create(ClusterResultHistory.Value))); + } + else + { + aggregator.Tell(new ReportTo(Option.None)); + } + }, + Roles.First()); + await EnterBarrierAsync("result-aggregator-created-" + Step); - public override void MuteLog(ActorSystem sys = null) + RunOn(() => { - sys ??= Sys; - base.MuteLog(sys); - Sys.EventStream.Publish(new Mute(new ErrorFilter(typeof(ApplicationException), new ContainsString("Simulated exception")))); - MuteDeadLetters(sys, typeof(AggregatedClusterResult), typeof(StatsResult), typeof(PhiResult), typeof(RetryTick)); - } + var resultAggregator = ClusterResultAggregator(); + PhiObserver.Value.Tell(new ReportTo(resultAggregator)); + StatsObserver.Value.Tell(Reset.Instance); + StatsObserver.Value.Tell(new ReportTo(resultAggregator)); + }, Roles.Take(NbrUsedRoles).ToArray()); - public StressSpec() : this(new StressSpecConfig()){ } + } - protected StressSpec(StressSpecConfig config) : base(config, typeof(StressSpec)) + public async Task AwaitClusterResultAsync() + { + RunOn(() => { - Settings = new StressSpecConfig.Settings(Sys.Settings.Config, config.TotalNumberOfNodes); - ClusterResultHistory = new Lazy(() => + ClusterResultAggregator().OnSuccess(r => { - if (Settings.Infolog) - return Sys.ActorOf(Props.Create(() => new ClusterResultHistory()), "resultHistory"); - return Sys.DeadLetters; + Watch(r); + ExpectMsg(t => t.ActorRef.Path == r.Path); }); + }, Roles.First()); + await EnterBarrierAsync("cluster-result-done-" + Step); + } - PhiObserver = new Lazy(() => - { - return Sys.ActorOf(Props.Create(() => new PhiObserver()), "phiObserver"); - }); - - StatsObserver = new Lazy(() => - { - return Sys.ActorOf(Props.Create(() => new StatsObserver()), "statsObserver"); - }); - } - - protected override void AtStartup() - { - IdentifyProbe = CreateTestProbe(); - base.AtStartup(); - } - - public string ClrInfo() - { - var sb = new StringBuilder(); - sb.Append("Operating System: ") - .Append(Environment.OSVersion.Platform) - .Append(", ") - .Append(RuntimeInformation.ProcessArchitecture.ToString()) - .Append(", ") - .Append(Environment.OSVersion.VersionString) - .AppendLine(); - - sb.Append("CLR: ") - .Append(RuntimeInformation.FrameworkDescription) - .AppendLine(); - - sb.Append("Processors: ").Append(Environment.ProcessorCount) - .AppendLine() - .Append("Load average: ").Append("can't be easily measured on .NET Core") // TODO: fix - .AppendLine() - .Append("Thread count: ") - .Append(Process.GetCurrentProcess().Threads.Count) - .AppendLine(); - - sb.Append("Memory: ") - .Append(" (") - .Append(Process.GetCurrentProcess().WorkingSet64 / 1024 / 1024) - .Append(" - ") - .Append(Process.GetCurrentProcess().PeakWorkingSet64 / 1024 / 1024) - .Append(") MB [working set / peak working set]"); - - sb.AppendLine("Args: ").Append(string.Join(Environment.NewLine, Environment.GetCommandLineArgs())) - .AppendLine(); - - return sb.ToString(); - } - - public ImmutableList SeedNodes => Roles.Take(Settings.NumberOfSeedNodes).ToImmutableList(); - - internal GossipStats LatestGossipStats => Cluster.ReadView.LatestStats.GossipStats; - - public Lazy ClusterResultHistory { get; } - - public Lazy PhiObserver { get; } - - public Lazy StatsObserver { get; } - - public Option ClusterResultAggregator() + public async Task JoinOneByOneAsync(int numberOfNodes) + { + foreach (var i in Enumerable.Range(0, numberOfNodes)) { - Sys.ActorSelection(new RootActorPath(GetAddress(Roles.First())) / "user" / ("result" + Step)) - .Tell(new Identify(Step), IdentifyProbe.Ref); - return Option.Create(IdentifyProbe.ExpectMsg().Subject); + await JoinOneAsync(); + NbrUsedRoles += 1; + Step += 1; } + } - public void CreateResultAggregator(string title, int expectedResults, bool includeInHistory) - { - RunOn(() => - { - var aggregator = Sys.ActorOf( - Props.Create(() => new ClusterResultAggregator(title, expectedResults, Settings)) - .WithDeploy(Deploy.Local), "result" + Step); - - if (includeInHistory && Settings.Infolog) - { - aggregator.Tell(new ReportTo(Option.Create(ClusterResultHistory.Value))); - } - else - { - aggregator.Tell(new ReportTo(Option.None)); - } - }, - Roles.First()); - EnterBarrier("result-aggregator-created-" + Step); - - RunOn(() => - { - var resultAggregator = ClusterResultAggregator(); - PhiObserver.Value.Tell(new ReportTo(resultAggregator)); - StatsObserver.Value.Tell(Reset.Instance); - StatsObserver.Value.Tell(new ReportTo(resultAggregator)); - }, Roles.Take(NbrUsedRoles).ToArray()); - - } + public TimeSpan ConvergenceWithin(TimeSpan baseDuration, int nodes) + { + return TimeSpan.FromMilliseconds(baseDuration.TotalMilliseconds * Settings.ConvergenceWithinFactor * nodes); + } - public void AwaitClusterResult() + public async Task JoinOneAsync() + { + await WithinAsync(TimeSpan.FromSeconds(5) + ConvergenceWithin(TimeSpan.FromSeconds(2), NbrUsedRoles + 1), async () => { - RunOn(() => + var currentRoles = Roles.Take(NbrUsedRoles + 1).ToArray(); + var title = $"join one to {NbrUsedRoles} nodes cluster"; + await CreateResultAggregatorAsync(title, expectedResults: currentRoles.Length, includeInHistory: true); + await RunOnAsync(async () => { - ClusterResultAggregator().OnSuccess(r => + await ReportResult(async () => { - Watch(r); - ExpectMsg(t => t.ActorRef.Path == r.Path); + await RunOnAsync(async () => + { + await Cluster.JoinAsync(GetAddress(Roles.First())); + }, currentRoles.Last()); + await AwaitMembersUpAsync(currentRoles.Length, timeout: RemainingOrDefault); + return true; }); - }, Roles.First()); - EnterBarrier("cluster-result-done-" + Step); - } - - public void JoinOneByOne(int numberOfNodes) - { - foreach (var i in Enumerable.Range(0, numberOfNodes)) - { - JoinOne(); - NbrUsedRoles += 1; - Step += 1; - } - } + }, currentRoles); + await AwaitClusterResultAsync(); + await EnterBarrierAsync("join-one-" + Step); + }); + } - public TimeSpan ConvergenceWithin(TimeSpan baseDuration, int nodes) + public async Task JoinSeveralAsync(int numberOfNodes, bool toSeedNodes) + { + string FormatSeedJoin() { - return TimeSpan.FromMilliseconds(baseDuration.TotalMilliseconds * Settings.ConvergenceWithinFactor * nodes); + return toSeedNodes ? "seed nodes" : "one node"; } - public void JoinOne() - { - Within(TimeSpan.FromSeconds(5) + ConvergenceWithin(TimeSpan.FromSeconds(2), NbrUsedRoles + 1), () => + await WithinAsync(TimeSpan.FromSeconds(10) + ConvergenceWithin(TimeSpan.FromSeconds(3), NbrUsedRoles + numberOfNodes), + async () => { - var currentRoles = Roles.Take(NbrUsedRoles + 1).ToArray(); - var title = $"join one to {NbrUsedRoles} nodes cluster"; - CreateResultAggregator(title, expectedResults: currentRoles.Length, includeInHistory: true); - RunOn(() => + var currentRoles = Roles.Take(NbrUsedRoles + numberOfNodes).ToArray(); + var joiningRoles = currentRoles.Skip(NbrUsedRoles).ToArray(); + var title = $"join {numberOfNodes} to {FormatSeedJoin()}, in {NbrUsedRoles} nodes cluster"; + await CreateResultAggregatorAsync(title, expectedResults: currentRoles.Length, true); + await RunOnAsync(async () => { - ReportResult(() => + await ReportResult(async () => { RunOn(() => { - Cluster.Join(GetAddress(Roles.First())); - }, currentRoles.Last()); - AwaitMembersUp(currentRoles.Length, timeout: RemainingOrDefault); + if (toSeedNodes) + { + Cluster.JoinSeedNodes(SeedNodes.Select(GetAddress)); + } + else + { + Cluster.Join(GetAddress(Roles.First())); + } + }, joiningRoles); + await AwaitMembersUpAsync(currentRoles.Length, timeout: RemainingOrDefault); return true; }); }, currentRoles); - AwaitClusterResult(); - EnterBarrier("join-one-" + Step); + await AwaitClusterResultAsync(); + await EnterBarrierAsync("join-several-" + Step); }); - } + } - public void JoinSeveral(int numberOfNodes, bool toSeedNodes) + public async Task RemoveOneByOne(int numberOfNodes, bool shutdown) + { + foreach (var i in Enumerable.Range(0, numberOfNodes)) { - string FormatSeedJoin() - { - return toSeedNodes ? "seed nodes" : "one node"; - } - - Within(TimeSpan.FromSeconds(10) + ConvergenceWithin(TimeSpan.FromSeconds(3), NbrUsedRoles + numberOfNodes), - () => - { - var currentRoles = Roles.Take(NbrUsedRoles + numberOfNodes).ToArray(); - var joiningRoles = currentRoles.Skip(NbrUsedRoles).ToArray(); - var title = $"join {numberOfNodes} to {FormatSeedJoin()}, in {NbrUsedRoles} nodes cluster"; - CreateResultAggregator(title, expectedResults: currentRoles.Length, true); - RunOn(() => - { - ReportResult(() => - { - RunOn(() => - { - if (toSeedNodes) - { - Cluster.JoinSeedNodes(SeedNodes.Select(x => GetAddress(x))); - } - else - { - Cluster.Join(GetAddress(Roles.First())); - } - }, joiningRoles); - AwaitMembersUp(currentRoles.Length, timeout: RemainingOrDefault); - return true; - }); - }, currentRoles); - AwaitClusterResult(); - EnterBarrier("join-several-" + Step); - }); + await RemoveOneAsync(shutdown); + NbrUsedRoles -= 1; + Step += 1; } + } - public void RemoveOneByOne(int numberOfNodes, bool shutdown) + public async Task RemoveOneAsync(bool shutdown) + { + string FormatNodeLeave() { - foreach (var i in Enumerable.Range(0, numberOfNodes)) - { - RemoveOne(shutdown); - NbrUsedRoles -= 1; - Step += 1; - } + return shutdown ? "shutdown" : "remove"; } - public void RemoveOne(bool shutdown) + await WithinAsync(TimeSpan.FromSeconds(25) + ConvergenceWithin(TimeSpan.FromSeconds(3), NbrUsedRoles - 1), async () => { - string FormatNodeLeave() + var currentRoles = Roles.Take(NbrUsedRoles - 1).ToArray(); + var title = $"{FormatNodeLeave()} one from {NbrUsedRoles} nodes cluster"; + await CreateResultAggregatorAsync(title, expectedResults:currentRoles.Length, true); + + var removeRole = Roles[NbrUsedRoles - 1]; + var removeAddress = GetAddress(removeRole); + Console.WriteLine($"Preparing to {FormatNodeLeave()}[{removeAddress}] role [{removeRole.Name}] out of [{Roles.Count}]"); + RunOn(() => { - return shutdown ? "shutdown" : "remove"; - } + var watchee = Sys.ActorOf(Props.Create(() => new Watchee()), "watchee"); + Console.WriteLine("Created watchee [{0}]", watchee); + }, removeRole); + + await EnterBarrierAsync("watchee-created-" + Step); - Within(TimeSpan.FromSeconds(25) + ConvergenceWithin(TimeSpan.FromSeconds(3), NbrUsedRoles - 1), () - => + await RunOnAsync(async () => { - var currentRoles = Roles.Take(NbrUsedRoles - 1).ToArray(); - var title = $"{FormatNodeLeave()} one from {NbrUsedRoles} nodes cluster"; - CreateResultAggregator(title, expectedResults:currentRoles.Length, true); - - var removeRole = Roles[NbrUsedRoles - 1]; - var removeAddress = GetAddress(removeRole); - Console.WriteLine($"Preparing to {FormatNodeLeave()}[{removeAddress}] role [{removeRole.Name}] out of [{Roles.Count}]"); - RunOn(() => + await AwaitAssertAsync(async () => { - var watchee = Sys.ActorOf(Props.Create(() => new Watchee()), "watchee"); - Console.WriteLine("Created watchee [{0}]", watchee); - }, removeRole); + Sys.ActorSelection(new RootActorPath(removeAddress) / "user" / "watchee").Tell(new Identify("watchee"), IdentifyProbe.Ref); + var watchee = (await IdentifyProbe.ExpectMsgAsync(TimeSpan.FromSeconds(1))).Subject; + await WatchAsync(watchee); + }, interval:TimeSpan.FromSeconds(1.25d)); + + }, Roles.First()); + await EnterBarrierAsync("watchee-established-" + Step); - EnterBarrier("watchee-created-" + Step); + RunOn(() => + { + if (!shutdown) + Cluster.Leave(GetAddress(Myself)); + }, removeRole); - RunOn(() => + await RunOnAsync(async () => + { + await ReportResult(async () => { - AwaitAssert(() => + await RunOnAsync(async () => { - Sys.ActorSelection(new RootActorPath(removeAddress) / "user" / "watchee").Tell(new Identify("watchee"), IdentifyProbe.Ref); - var watchee = IdentifyProbe.ExpectMsg(TimeSpan.FromSeconds(1)).Subject; - Watch(watchee); - }, interval:TimeSpan.FromSeconds(1.25d)); - - }, Roles.First()); - EnterBarrier("watchee-established-" + Step); + if (shutdown) + { + if (Settings.Infolog) + { + Log.Info("Shutting down [{0}]", removeAddress); + } + + await TestConductor.ExitAsync(removeRole, 0); + } + }, Roles.First()); + + await AwaitMembersUpAsync(currentRoles.Length, timeout: RemainingOrDefault); + await AwaitAllReachableAsync(); + return true; + }); + }, currentRoles); + + await RunOnAsync(async () => + { + var expectedPath = new RootActorPath(removeAddress) / "user" / "watchee"; + await ExpectMsgAsync(t => t.ActorRef.Path == expectedPath); + }, Roles.First()); + + await EnterBarrierAsync("watch-verified-" + Step); + + await AwaitClusterResultAsync(); + await EnterBarrierAsync("remove-one-" + Step); + }); + } + + public async Task RemoveSeveralAsync(int numberOfNodes, bool shutdown) + { + string FormatNodeLeave() + { + return shutdown ? "shutdown" : "remove"; + } + + await WithinAsync(TimeSpan.FromSeconds(25) + ConvergenceWithin(TimeSpan.FromSeconds(5), NbrUsedRoles - numberOfNodes), + async () => + { + var currentRoles = Roles.Take(NbrUsedRoles - numberOfNodes).ToArray(); + var removeRoles = Roles.Skip(currentRoles.Length).Take(numberOfNodes).ToArray(); + var title = $"{FormatNodeLeave()} {numberOfNodes} in {NbrUsedRoles} nodes cluster"; + await CreateResultAggregatorAsync(title, expectedResults: currentRoles.Length, includeInHistory: true); RunOn(() => { if (!shutdown) + { Cluster.Leave(GetAddress(Myself)); - }, removeRole); + } + }, removeRoles); - RunOn(() => + await RunOnAsync(async () => { - ReportResult(() => + await ReportResult(async () => { - RunOn(() => + await RunOnAsync(async () => { if (shutdown) { - if (Settings.Infolog) + foreach (var role in removeRoles) { - Log.Info("Shutting down [{0}]", removeAddress); + if (Settings.Infolog) + Log.Info("Shutting down [{0}]", GetAddress(role)); + await TestConductor.ExitAsync(role, 0); } - - TestConductor.Exit(removeRole, 0).Wait(); } }, Roles.First()); - - AwaitMembersUp(currentRoles.Length, timeout: RemainingOrDefault); - AwaitAllReachable(); + await AwaitMembersUpAsync(currentRoles.Length, timeout: RemainingOrDefault); + await AwaitAllReachableAsync(); return true; }); }, currentRoles); - RunOn(() => - { - var expectedPath = new RootActorPath(removeAddress) / "user" / "watchee"; - ExpectMsg(t => t.ActorRef.Path == expectedPath); - }, Roles.First()); - - EnterBarrier("watch-verified-" + Step); - - AwaitClusterResult(); - EnterBarrier("remove-one-" + Step); + await AwaitClusterResultAsync(); + await EnterBarrierAsync("remove-several-" + Step); }); - } + } - public void RemoveSeveral(int numberOfNodes, bool shutdown) - { - string FormatNodeLeave() + public async Task PartitionSeveral(int numberOfNodes) + { + await WithinAsync(TimeSpan.FromSeconds(25) + ConvergenceWithin(TimeSpan.FromSeconds(5), NbrUsedRoles - numberOfNodes), + async () => { - return shutdown ? "shutdown" : "remove"; - } - - Within(TimeSpan.FromSeconds(25) + ConvergenceWithin(TimeSpan.FromSeconds(5), NbrUsedRoles - numberOfNodes), - () => + var currentRoles = Roles.Take(NbrUsedRoles - numberOfNodes).ToArray(); + var removeRoles = Roles.Skip(currentRoles.Length).Take(numberOfNodes).ToArray(); + var title = $"partition {numberOfNodes} in {NbrUsedRoles} nodes cluster"; + Console.WriteLine(title); + Console.WriteLine("[{0}] are blackholing [{1}]", string.Join(",", currentRoles.Select(x => x.ToString())), string.Join(",", removeRoles.Select(x => x.ToString()))); + await CreateResultAggregatorAsync(title, expectedResults: currentRoles.Length, includeInHistory: true); + + await RunOnAsync(async () => { - var currentRoles = Roles.Take(NbrUsedRoles - numberOfNodes).ToArray(); - var removeRoles = Roles.Skip(currentRoles.Length).Take(numberOfNodes).ToArray(); - var title = $"{FormatNodeLeave()} {numberOfNodes} in {NbrUsedRoles} nodes cluster"; - CreateResultAggregator(title, expectedResults: currentRoles.Length, includeInHistory: true); - - RunOn(() => + foreach (var x in currentRoles) { - if (!shutdown) + foreach (var y in removeRoles) { - Cluster.Leave(GetAddress(Myself)); + await TestConductor.BlackholeAsync(x, y, ThrottleTransportAdapter.Direction.Both); } - }, removeRoles); + } + }, Roles.First()); + await EnterBarrierAsync("partition-several-blackhole"); - RunOn(() => + await RunOnAsync(async () => + { + await ReportResult(async () => { - ReportResult(() => - { - RunOn(() => - { - if (shutdown) - { - foreach (var role in removeRoles) - { - if (Settings.Infolog) - Log.Info("Shutting down [{0}]", GetAddress(role)); - TestConductor.Exit(role, 0).Wait(RemainingOrDefault); - } - } - }, Roles.First()); - AwaitMembersUp(currentRoles.Length, timeout: RemainingOrDefault); - AwaitAllReachable(); - return true; - }); - }, currentRoles); - - AwaitClusterResult(); - EnterBarrier("remove-several-" + Step); - }); - } + var startTime = MonotonicClock.GetTicks(); + await AwaitMembersUpAsync(currentRoles.Length, timeout:RemainingOrDefault); + Sys.Log.Info("Removed [{0}] members after [{0} ms]", + removeRoles.Length, TimeSpan.FromTicks(MonotonicClock.GetTicks() - startTime).TotalMilliseconds); + await AwaitAllReachableAsync(); + return true; + }); + }, currentRoles); - public void PartitionSeveral(int numberOfNodes) - { - Within(TimeSpan.FromSeconds(25) + ConvergenceWithin(TimeSpan.FromSeconds(5), NbrUsedRoles - numberOfNodes), - () => + RunOn(() => { - var currentRoles = Roles.Take(NbrUsedRoles - numberOfNodes).ToArray(); - var removeRoles = Roles.Skip(currentRoles.Length).Take(numberOfNodes).ToArray(); - var title = $"partition {numberOfNodes} in {NbrUsedRoles} nodes cluster"; - Console.WriteLine(title); - Console.WriteLine("[{0}] are blackholing [{1}]", string.Join(",", currentRoles.Select(x => x.ToString())), string.Join(",", removeRoles.Select(x => x.ToString()))); - CreateResultAggregator(title, expectedResults: currentRoles.Length, includeInHistory: true); - - RunOn(() => + Sys.ActorOf(Props.Create()); + AwaitAssert(() => { - foreach (var x in currentRoles) - { - foreach (var y in removeRoles) - { - TestConductor.Blackhole(x, y, ThrottleTransportAdapter.Direction.Both).Wait(); - } - } - }, Roles.First()); - EnterBarrier("partition-several-blackhole"); + Cluster.IsTerminated.Should().BeTrue(); + }); + }, removeRoles); + await AwaitClusterResultAsync(); + await EnterBarrierAsync("partition-several-" + Step); + }); + } - RunOn(() => - { - ReportResult(() => - { - var startTime = MonotonicClock.GetTicks(); - AwaitMembersUp(currentRoles.Length, timeout:RemainingOrDefault); - Sys.Log.Info("Removed [{0}] members after [{0} ms]", - removeRoles.Length, TimeSpan.FromTicks(MonotonicClock.GetTicks() - startTime).TotalMilliseconds); - AwaitAllReachable(); - return true; - }); - }, currentRoles); + public T ReportResult(Func thunk) + { + var startTime = MonotonicClock.GetTicks(); + var startStats = ClusterView.LatestStats.GossipStats; - RunOn(() => - { - Sys.ActorOf(Props.Create()); - AwaitAssert(() => - { - Cluster.IsTerminated.Should().BeTrue(); - }); - }, removeRoles); - AwaitClusterResult(); - EnterBarrier("partition-several-" + Step); - }); - } + var returnValue = thunk(); - public T ReportResult(Func thunk) + ClusterResultAggregator().OnSuccess(r => { - var startTime = MonotonicClock.GetTicks(); - var startStats = ClusterView.LatestStats.GossipStats; + r.Tell(new ClusterResult(Cluster.SelfAddress, TimeSpan.FromTicks(MonotonicClock.GetTicks() - startTime), LatestGossipStats - startStats)); + }); - var returnValue = thunk(); + return returnValue; + } - ClusterResultAggregator().OnSuccess(r => - { - r.Tell(new ClusterResult(Cluster.SelfAddress, TimeSpan.FromTicks(MonotonicClock.GetTicks() - startTime), LatestGossipStats - startStats)); - }); + public async Task ReportResult(Func> thunk) + { + var startTime = MonotonicClock.GetTicks(); + var startStats = ClusterView.LatestStats.GossipStats; - return returnValue; - } + var returnValue = await thunk(); + + ClusterResultAggregator().OnSuccess(r => + { + r.Tell(new ClusterResult(Cluster.SelfAddress, TimeSpan.FromTicks(MonotonicClock.GetTicks() - startTime), LatestGossipStats - startStats)); + }); - public void ExerciseJoinRemove(string title, TimeSpan duration) + return returnValue; + } + + public async Task ExerciseJoinRemoveAsync(string title, TimeSpan duration) + { + var activeRoles = Roles.Take(Settings.NumberOfNodesJoinRemove).ToArray(); + var loopDuration = TimeSpan.FromSeconds(10) + + ConvergenceWithin(TimeSpan.FromSeconds(4), NbrUsedRoles + activeRoles.Length); + var rounds = (int)Math.Max(1.0d, (duration - loopDuration).TotalMilliseconds / loopDuration.TotalMilliseconds); + var usedRoles = Roles.Take(NbrUsedRoles).ToArray(); + var usedAddresses = usedRoles.Select(GetAddress).ToImmutableHashSet(); + + async Task> Loop(int counter, Option previousAs, + ImmutableHashSet
allPreviousAddresses) { - var activeRoles = Roles.Take(Settings.NumberOfNodesJoinRemove).ToArray(); - var loopDuration = TimeSpan.FromSeconds(10) + - ConvergenceWithin(TimeSpan.FromSeconds(4), NbrUsedRoles + activeRoles.Length); - var rounds = (int)Math.Max(1.0d, (duration - loopDuration).TotalMilliseconds / loopDuration.TotalMilliseconds); - var usedRoles = Roles.Take(NbrUsedRoles).ToArray(); - var usedAddresses = usedRoles.Select(x => GetAddress(x)).ToImmutableHashSet(); - - Option Loop(int counter, Option previousAs, - ImmutableHashSet
allPreviousAddresses) + if (counter > rounds) + return previousAs; + + var t = title + " round " + counter; + RunOn(() => { - if (counter > rounds) return previousAs; + PhiObserver.Value.Tell(Reset.Instance); + StatsObserver.Value.Tell(Reset.Instance); + }, usedRoles); + await CreateResultAggregatorAsync(t, expectedResults:NbrUsedRoles, includeInHistory:true); - var t = title + " round " + counter; - RunOn(() => - { - PhiObserver.Value.Tell(Reset.Instance); - StatsObserver.Value.Tell(Reset.Instance); - }, usedRoles); - CreateResultAggregator(t, expectedResults:NbrUsedRoles, includeInHistory:true); - - var nextAs = Option.None; - var nextAddresses = ImmutableHashSet
.Empty; - Within(loopDuration, () => + var nextAs = Option.None; + var nextAddresses = ImmutableHashSet
.Empty; + await WithinAsync(loopDuration, async () => + { + var (nextAsy, nextAddr) = await ReportResult(async () => { - var (nextAsy, nextAddr) = ReportResult(() => - { - Option nextAs; + Option nextAs; - if (activeRoles.Contains(Myself)) - { - previousAs.OnSuccess(s => - { - Shutdown(s); - }); - - var sys = ActorSystem.Create(Sys.Name, Sys.Settings.Config); - MuteLog(sys); - Akka.Cluster.Cluster.Get(sys).JoinSeedNodes(SeedNodes.Select(x => GetAddress(x))); - nextAs = Option.Create(sys); - } - else + if (activeRoles.Contains(Myself)) + { + previousAs.OnSuccess(s => { - nextAs = previousAs; - } + Shutdown(s); + }); - RunOn(() => - { - AwaitMembersUp(NbrUsedRoles + activeRoles.Length, - canNotBePartOfMemberRing: allPreviousAddresses, - timeout: RemainingOrDefault); - AwaitAllReachable(); - }, usedRoles); + var sys = ActorSystem.Create(Sys.Name, Sys.Settings.Config); + MuteLog(sys); + await Cluster.Get(sys).JoinSeedNodesAsync(SeedNodes.Select(GetAddress)); + nextAs = Option.Create(sys); + } + else + { + nextAs = previousAs; + } - nextAddresses = ClusterView.Members.Select(x => x.Address).ToImmutableHashSet() - .Except(usedAddresses); + await RunOnAsync(async () => + { + await AwaitMembersUpAsync(NbrUsedRoles + activeRoles.Length, + canNotBePartOfMemberRing: allPreviousAddresses, + timeout: RemainingOrDefault); + await AwaitAllReachableAsync(); + }, usedRoles); - RunOn(() => - { - nextAddresses.Count.Should().Be(Settings.NumberOfNodesJoinRemove); - }, usedRoles); + nextAddresses = ClusterView.Members.Select(x => x.Address).ToImmutableHashSet() + .Except(usedAddresses); - return (nextAs, nextAddresses); - }); + RunOn(() => + { + nextAddresses.Count.Should().Be(Settings.NumberOfNodesJoinRemove); + }, usedRoles); - nextAs = nextAsy; - nextAddresses = nextAddr; + return (nextAs, nextAddresses); }); - AwaitClusterResult(); - Step += 1; - return Loop(counter + 1, nextAs, nextAddresses); - } - - Loop(1, Option.None, ImmutableHashSet
.Empty).OnSuccess(aSys => - { - Shutdown(aSys); + nextAs = nextAsy; + nextAddresses = nextAddr; }); - Within(loopDuration, () => - { - RunOn(() => - { - AwaitMembersUp(NbrUsedRoles, timeout: RemainingOrDefault); - AwaitAllReachable(); - PhiObserver.Value.Tell(Reset.Instance); - StatsObserver.Value.Tell(Reset.Instance); - }, usedRoles); - }); - EnterBarrier("join-remove-shutdown-" + Step); + await AwaitClusterResultAsync(); + Step += 1; + return await Loop(counter + 1, nextAs, nextAddresses); } - public void IdleGossip(string title) + (await Loop(1, Option.None, ImmutableHashSet
.Empty)).OnSuccess(aSys => { - CreateResultAggregator(title, expectedResults: NbrUsedRoles, includeInHistory: true); - ReportResult(() => - { - ClusterView.Members.Count.Should().Be(NbrUsedRoles); - Thread.Sleep(Settings.IdleGossipDuration); - ClusterView.Members.Count.Should().Be(NbrUsedRoles); - return true; - }); - AwaitClusterResult(); - } + Shutdown(aSys); + }); - public void IncrementStep() + await WithinAsync(loopDuration, async () => { - Step += 1; - } + await RunOnAsync(async () => + { + await AwaitMembersUpAsync(NbrUsedRoles, timeout: RemainingOrDefault); + await AwaitAllReachableAsync(); + PhiObserver.Value.Tell(Reset.Instance); + StatsObserver.Value.Tell(Reset.Instance); + }, usedRoles); + }); + await EnterBarrierAsync("join-remove-shutdown-" + Step); + } - [MultiNodeFact] - public void Cluster_under_stress() + public async Task IdleGossipAsync(string title) + { + await CreateResultAggregatorAsync(title, expectedResults: NbrUsedRoles, includeInHistory: true); + await ReportResult(async () => { - MustLogSettings(); - IncrementStep(); - MustJoinSeedNodes(); - IncrementStep(); - MustJoinSeedNodesOneByOneToSmallCluster(); - IncrementStep(); - MustJoinSeveralNodesToOneNode(); - IncrementStep(); - MustJoinSeveralNodesToSeedNodes(); - IncrementStep(); - MustJoinNodesOneByOneToLargeCluster(); - IncrementStep(); - MustExerciseJoinRemoveJoinRemove(); - IncrementStep(); - MustGossipWhenIdle(); - IncrementStep(); - MustDownPartitionedNodes(); - IncrementStep(); - MustLeaveNodesOneByOneFromLargeCluster(); - IncrementStep(); - MustShutdownNodesOneByOneFromLargeCluster(); - IncrementStep(); - MustLeaveSeveralNodes(); - IncrementStep(); - MustShutdownSeveralNodes(); - IncrementStep(); - MustShutdownNodesOneByOneFromSmallCluster(); - IncrementStep(); - MustLeaveNodesOneByOneFromSmallCluster(); - IncrementStep(); - MustLogClrInfo(); - } + ClusterView.Members.Count.Should().Be(NbrUsedRoles); + await Task.Delay(Settings.IdleGossipDuration); + ClusterView.Members.Count.Should().Be(NbrUsedRoles); + return true; + }); + await AwaitClusterResultAsync(); + } + + public void IncrementStep() + { + Step += 1; + } + + [MultiNodeFact] + public async Task Cluster_under_stress() + { + await MustLogSettings(); + IncrementStep(); + await MustJoinSeedNodesAsync(); + IncrementStep(); + await MustJoinSeedNodesOneByOneToSmallClusterAsync(); + IncrementStep(); + await MustJoinSeveralNodesToOneNodeAsync(); + IncrementStep(); + await MustJoinSeveralNodesToSeedNodesAsync(); + IncrementStep(); + await MustJoinNodesOneByOneToLargeClusterAsync(); + IncrementStep(); + await MustExerciseJoinRemoveJoinRemoveAsync(); + IncrementStep(); + await MustGossipWhenIdleAsync(); + IncrementStep(); + await MustDownPartitionedNodesAsync(); + IncrementStep(); + await MustLeaveNodesOneByOneFromLargeClusterAsync(); + IncrementStep(); + await MustShutdownNodesOneByOneFromLargeClusterAsync(); + IncrementStep(); + await MustLeaveSeveralNodesAsync(); + IncrementStep(); + await MustShutdownSeveralNodesAsync(); + IncrementStep(); + await MustShutdownNodesOneByOneFromSmallClusterAsync(); + IncrementStep(); + await MustLeaveNodesOneByOneFromSmallClusterAsync(); + IncrementStep(); + await MustLogClrInfoAsync(); + } - public void MustLogSettings() + public async Task MustLogSettings() + { + if (Settings.Infolog) { - if (Settings.Infolog) + Log.Info("StressSpec CLR:" + Environment.NewLine + ClrInfo()); + RunOn(() => { - Log.Info("StressSpec CLR:" + Environment.NewLine + ClrInfo()); - RunOn(() => - { - Log.Info("StressSpec settings:" + Environment.NewLine + Settings); - }); - } - EnterBarrier("after-" + Step); + Log.Info("StressSpec settings:" + Environment.NewLine + Settings); + }); } + await EnterBarrierAsync("after-" + Step); + } - public void MustJoinSeedNodes() + public async Task MustJoinSeedNodesAsync() + { + await WithinAsync(TimeSpan.FromSeconds(30), async () => { - Within(TimeSpan.FromSeconds(30), () => - { - var otherNodesJoiningSeedNodes = Roles.Skip(Settings.NumberOfSeedNodes) - .Take(Settings.NumberOfNodesJoiningToSeedNodesInitially).ToArray(); - var size = SeedNodes.Count + otherNodesJoiningSeedNodes.Length; + var otherNodesJoiningSeedNodes = Roles.Skip(Settings.NumberOfSeedNodes) + .Take(Settings.NumberOfNodesJoiningToSeedNodesInitially).ToArray(); + var size = SeedNodes.Count + otherNodesJoiningSeedNodes.Length; - CreateResultAggregator("join seed nodes", expectedResults: size, includeInHistory: true); + await CreateResultAggregatorAsync("join seed nodes", expectedResults: size, includeInHistory: true); - RunOn(() => + await RunOnAsync(async () => + { + await ReportResult(async () => { - ReportResult(() => - { - Cluster.JoinSeedNodes(SeedNodes.Select(x => GetAddress(x))); - AwaitMembersUp(size, timeout: RemainingOrDefault); - return true; - }); - }, SeedNodes.AddRange(otherNodesJoiningSeedNodes).ToArray()); + await Cluster.JoinSeedNodesAsync(SeedNodes.Select(GetAddress)); + await AwaitMembersUpAsync(size, timeout: RemainingOrDefault); + return await Task.FromResult(true); + }); + }, SeedNodes.AddRange(otherNodesJoiningSeedNodes).ToArray()); - AwaitClusterResult(); - NbrUsedRoles += size; - EnterBarrier("after-" + Step); - }); - } + await AwaitClusterResultAsync(); + NbrUsedRoles += size; + await EnterBarrierAsync("after-" + Step); + }); + } - public void MustJoinSeedNodesOneByOneToSmallCluster() - { - JoinOneByOne(Settings.NumberOfNodesJoiningOneByOneSmall); - EnterBarrier("after-" + Step); - } + public async Task MustJoinSeedNodesOneByOneToSmallClusterAsync() + { + await JoinOneByOneAsync(Settings.NumberOfNodesJoiningOneByOneSmall); + await EnterBarrierAsync("after-" + Step); + } - public void MustJoinSeveralNodesToOneNode() - { - JoinSeveral(Settings.NumberOfNodesJoiningToOneNode, false); - NbrUsedRoles += Settings.NumberOfNodesJoiningToOneNode; - EnterBarrier("after-" + Step); - } + public async Task MustJoinSeveralNodesToOneNodeAsync() + { + await JoinSeveralAsync(Settings.NumberOfNodesJoiningToOneNode, false); + NbrUsedRoles += Settings.NumberOfNodesJoiningToOneNode; + await EnterBarrierAsync("after-" + Step); + } - public void MustJoinSeveralNodesToSeedNodes() + public async Task MustJoinSeveralNodesToSeedNodesAsync() + { + if (Settings.NumberOfNodesJoiningToSeedNodes > 0) { - if (Settings.NumberOfNodesJoiningToSeedNodes > 0) - { - JoinSeveral(Settings.NumberOfNodesJoiningToSeedNodes, true); - NbrUsedRoles += Settings.NumberOfNodesJoiningToSeedNodes; - } - EnterBarrier("after-" + Step); + await JoinSeveralAsync(Settings.NumberOfNodesJoiningToSeedNodes, true); + NbrUsedRoles += Settings.NumberOfNodesJoiningToSeedNodes; } + await EnterBarrierAsync("after-" + Step); + } - public void MustJoinNodesOneByOneToLargeCluster() - { - JoinOneByOne(Settings.NumberOfNodesJoiningOneByOneLarge); - EnterBarrier("after-" + Step); - } + public async Task MustJoinNodesOneByOneToLargeClusterAsync() + { + await JoinOneByOneAsync(Settings.NumberOfNodesJoiningOneByOneLarge); + await EnterBarrierAsync("after-" + Step); + } - public void MustExerciseJoinRemoveJoinRemove() - { - ExerciseJoinRemove("exercise join/remove", Settings.JoinRemoveDuration); - EnterBarrier("after-" + Step); - } + public async Task MustExerciseJoinRemoveJoinRemoveAsync() + { + await ExerciseJoinRemoveAsync("exercise join/remove", Settings.JoinRemoveDuration); + await EnterBarrierAsync("after-" + Step); + } - public void MustGossipWhenIdle() - { - IdleGossip("idle gossip"); - EnterBarrier("after-" + Step); - } + public async Task MustGossipWhenIdleAsync() + { + await IdleGossipAsync("idle gossip"); + await EnterBarrierAsync("after-" + Step); + } - public void MustDownPartitionedNodes() - { - PartitionSeveral(Settings.NumberOfNodesPartition); - NbrUsedRoles -= Settings.NumberOfNodesPartition; - EnterBarrier("after-" + Step); - } + public async Task MustDownPartitionedNodesAsync() + { + await PartitionSeveral(Settings.NumberOfNodesPartition); + NbrUsedRoles -= Settings.NumberOfNodesPartition; + await EnterBarrierAsync("after-" + Step); + } - public void MustLeaveNodesOneByOneFromLargeCluster() - { - RemoveOneByOne(Settings.NumberOfNodesLeavingOneByOneLarge, shutdown:false); - EnterBarrier("after-" + Step); - } + public async Task MustLeaveNodesOneByOneFromLargeClusterAsync() + { + await RemoveOneByOne(Settings.NumberOfNodesLeavingOneByOneLarge, shutdown:false); + await EnterBarrierAsync("after-" + Step); + } - public void MustShutdownNodesOneByOneFromLargeCluster() - { - RemoveOneByOne(Settings.NumberOfNodesShutdownOneByOneLarge, shutdown: true); - EnterBarrier("after-" + Step); - } + public async Task MustShutdownNodesOneByOneFromLargeClusterAsync() + { + await RemoveOneByOne(Settings.NumberOfNodesShutdownOneByOneLarge, shutdown: true); + await EnterBarrierAsync("after-" + Step); + } - public void MustLeaveSeveralNodes() - { - RemoveSeveral(Settings.NumberOfNodesLeaving, shutdown: false); - NbrUsedRoles -= Settings.NumberOfNodesLeaving; - EnterBarrier("after-" + Step); - } + public async Task MustLeaveSeveralNodesAsync() + { + await RemoveSeveralAsync(Settings.NumberOfNodesLeaving, shutdown: false); + NbrUsedRoles -= Settings.NumberOfNodesLeaving; + await EnterBarrierAsync("after-" + Step); + } - public void MustShutdownSeveralNodes() - { - RemoveSeveral(Settings.NumberOfNodesShutdown, shutdown: true); - NbrUsedRoles -= Settings.NumberOfNodesShutdown; - EnterBarrier("after-" + Step); - } + public async Task MustShutdownSeveralNodesAsync() + { + await RemoveSeveralAsync(Settings.NumberOfNodesShutdown, shutdown: true); + NbrUsedRoles -= Settings.NumberOfNodesShutdown; + await EnterBarrierAsync("after-" + Step); + } - public void MustShutdownNodesOneByOneFromSmallCluster() - { - RemoveOneByOne(Settings.NumberOfNodesShutdownOneByOneSmall, true); - EnterBarrier("after-" + Step); - } + public async Task MustShutdownNodesOneByOneFromSmallClusterAsync() + { + await RemoveOneByOne(Settings.NumberOfNodesShutdownOneByOneSmall, true); + await EnterBarrierAsync("after-" + Step); + } - public void MustLeaveNodesOneByOneFromSmallCluster() - { - RemoveOneByOne(Settings.NumberOfNodesLeavingOneByOneSmall, false); - EnterBarrier("after-" + Step); - } + public async Task MustLeaveNodesOneByOneFromSmallClusterAsync() + { + await RemoveOneByOne(Settings.NumberOfNodesLeavingOneByOneSmall, false); + await EnterBarrierAsync("after-" + Step); + } - public void MustLogClrInfo() + public async Task MustLogClrInfoAsync() + { + if (Settings.Infolog) { - if (Settings.Infolog) - { - Log.Info("StressSpec CLR: " + Environment.NewLine + "{0}", ClrInfo()); - } - EnterBarrier("after-" + Step); + Log.Info("StressSpec CLR: " + Environment.NewLine + "{0}", ClrInfo()); } + await EnterBarrierAsync("after-" + Step); } -} +} \ No newline at end of file diff --git a/src/core/Akka.Remote.TestKit/Conductor.cs b/src/core/Akka.Remote.TestKit/Conductor.cs index 78abecb6569..abdf2c33ae6 100644 --- a/src/core/Akka.Remote.TestKit/Conductor.cs +++ b/src/core/Akka.Remote.TestKit/Conductor.cs @@ -9,6 +9,7 @@ using System.Collections.Concurrent; using System.Collections.Generic; using System.Net; +using System.Threading; using System.Threading.Tasks; using Akka.Actor; using Akka.Event; @@ -61,14 +62,40 @@ public IActorRef Controller /// /// /// - public async Task StartController(int participants, RoleName name, IPEndPoint controllerPort) + public Task StartController(int participants, RoleName name, IPEndPoint controllerPort) + { + return StartControllerAsync(participants, name, controllerPort, CancellationToken.None); + } + + /// + /// Start the , which in turn will + /// bind to a TCP port as specified in the `akka.testconductor.port` config + /// property, where 0 denotes automatic allocation. Since the latter is + /// actually preferred, a `Future[Int]` is returned which will be completed + /// with the port number actually chosen, so that this can then be communicated + /// to the players for their proper start-up. + /// + /// This method also invokes Player.startClient, + /// since it is expected that the conductor participates in barriers for + /// overall coordination. The returned Future will only be completed once the + /// client’s start-up finishes, which in fact waits for all other players to + /// connect. + /// + /// participants gives the number of participants which shall connect + /// before any of their startClient() operations complete + /// + /// + /// + /// + /// + public async Task StartControllerAsync(int participants, RoleName name, IPEndPoint controllerPort, CancellationToken cancellationToken = default) { if(_controller != null) throw new IllegalStateException("TestConductorServer was already started"); _controller = _system.ActorOf(Props.Create(() => new Controller(participants, controllerPort)), - "controller"); + "controller"); - var node = await _controller.Ask(TestKit.Controller.GetSockAddr.Instance, Settings.QueryTimeout).ConfigureAwait(false); - await StartClient(name, node).ConfigureAwait(false); + var node = await _controller.Ask(TestKit.Controller.GetSockAddr.Instance, Settings.QueryTimeout, cancellationToken); + await StartClient(name, node); return node; } @@ -96,9 +123,18 @@ public async Task StartController(int participants, RoleName name, I /// public Task Throttle(RoleName node, RoleName target, ThrottleTransportAdapter.Direction direction, float rateMBit) + { + return ThrottleAsync(node, target, direction, rateMBit, CancellationToken.None); + } + + /// + /// Async version of Throttle with cancellation token support. + /// + public Task ThrottleAsync(RoleName node, RoleName target, ThrottleTransportAdapter.Direction direction, + float rateMBit, CancellationToken cancellationToken = default) { RequireTestConductorTransport(); - return Controller.Ask(new Throttle(node, target, direction, rateMBit), Settings.QueryTimeout); + return Controller.Ask(new Throttle(node, target, direction, rateMBit), Settings.QueryTimeout, cancellationToken); } /// @@ -117,7 +153,28 @@ public Task Throttle(RoleName node, RoleName target, ThrottleTransportAdap /// public Task Blackhole(RoleName node, RoleName target, ThrottleTransportAdapter.Direction direction) { - return Throttle(node, target, direction, 0f); + return BlackholeAsync(node, target, direction, CancellationToken.None); + } + + /// + /// Async version of Blackhole with cancellation token support. + /// Switch the helios pipeline of the remote support into blackhole mode for + /// sending and/or receiving: it will just drop all messages right before + /// submitting them to the Socket or right after receiving them from the + /// Socket. + /// + /// ====Note==== + /// To use this feature you must activate the failure injector and throttler + /// transport adapters by specifying `testTransport(on = true)` in your MultiNodeConfig. + /// + /// is the symbolic name of the node which is to be affected + /// is the symbolic name of the other node to which connectivity shall be impeded + /// can be either `Direction.Send`, `Direction.Receive` or `Direction.Both` + /// Cancellation token + /// Task indicating completion + public Task BlackholeAsync(RoleName node, RoleName target, ThrottleTransportAdapter.Direction direction, CancellationToken cancellationToken = default) + { + return ThrottleAsync(node, target, direction, 0f, cancellationToken); } private void RequireTestConductorTransport() @@ -142,7 +199,26 @@ private void RequireTestConductorTransport() /// public Task PassThrough(RoleName node, RoleName target, ThrottleTransportAdapter.Direction direction) { - return Throttle(node, target, direction, -1f); + return PassThroughAsync(node, target, direction, CancellationToken.None); + } + + /// + /// Async version of PassThrough with cancellation token support. + /// Switch the Helios pipeline of the remote support into pass through mode for + /// sending and/or receiving. + /// + /// ====Note==== + /// To use this feature you must activate the failure injector and throttler + /// transport adapters by specifying `testTransport(on = true)` in your MultiNodeConfig. + /// + /// is the symbolic name of the node which is to be affected + /// is the symbolic name of the other node to which connectivity shall be impeded + /// can be either `Direction.Send`, `Direction.Receive` or `Direction.Both` + /// Cancellation token + /// Task indicating completion + public Task PassThroughAsync(RoleName node, RoleName target, ThrottleTransportAdapter.Direction direction, CancellationToken cancellationToken = default) + { + return ThrottleAsync(node, target, direction, -1f, cancellationToken); } /// @@ -155,7 +231,21 @@ public Task PassThrough(RoleName node, RoleName target, ThrottleTransportA /// public Task Disconnect(RoleName node, RoleName target) { - return Controller.Ask(new Disconnect(node, target, false), Settings.QueryTimeout); + return DisconnectAsync(node, target, CancellationToken.None); + } + + /// + /// Tell the remote support to TCP_RESET the connection to the given remote + /// peer. It works regardless of whether the recipient was initiator or + /// responder. + /// + /// is the symbolic name of the node which is to be affected + /// is the symbolic name of the other node to which connectivity shall be impeded + /// Cancellation token + /// + public Task DisconnectAsync(RoleName node, RoleName target, CancellationToken cancellationToken = default) + { + return Controller.Ask(new Disconnect(node, target, false), Settings.QueryTimeout, cancellationToken); } /// @@ -168,7 +258,21 @@ public Task Disconnect(RoleName node, RoleName target) /// public Task Abort(RoleName node, RoleName target) { - return Controller.Ask(new Disconnect(node, target, true), Settings.QueryTimeout); + return AbortAsync(node, target, CancellationToken.None); + } + + /// + /// Tell the remote support to TCP_RESET the connection to the given remote + /// peer. It works regardless of whether the recipient was initiator or + /// responder. + /// + /// is the symbolic name of the node which is to be affected + /// is the symbolic name of the other node to which connectivity shall be impeded + /// Cancellation token + /// + public Task AbortAsync(RoleName node, RoleName target, CancellationToken cancellationToken = default) + { + return Controller.Ask(new Disconnect(node, target, true), Settings.QueryTimeout, cancellationToken); } /// @@ -181,16 +285,33 @@ public Task Abort(RoleName node, RoleName target) /// TBD public Task Exit(RoleName node, int exitValue) { - // the recover is needed to handle ClientDisconnectedException exception, - // which is normal during shutdown - return Controller.Ask(new Terminate(node, new Right(exitValue)), Settings.QueryTimeout).ContinueWith(t => - { - if(t.Result is Done) return Done.Instance; - var failure = t.Result as FSMBase.Failure; - if (failure != null && failure.Cause is Controller.ClientDisconnectedException) return Done.Instance; + // Use the async version with no cancellation token for consistency + return ExitAsync(node, exitValue, CancellationToken.None); + } - throw new InvalidOperationException($"Expected Done but received {t.Result}"); - }); + /// + /// Async version of Exit with cancellation token support. + /// Tell the actor system at the remote node to shut itself down. The node will also be + /// removed, so that the remaining nodes may still pass subsequent barriers. + /// + /// is the symbolic name of the node which is to be affected + /// is the return code which shall be given to System.exit + /// Cancellation token + /// Task indicating completion + public async Task ExitAsync(RoleName node, int exitValue, CancellationToken cancellationToken = default) + { + try + { + var result = await Controller.Ask(new Terminate(node, new Right(exitValue)), Settings.QueryTimeout, cancellationToken); + if (result is Done) return Done.Instance; + if (result is FSMBase.Failure failure && failure.Cause is Controller.ClientDisconnectedException) + return Done.Instance; + throw new InvalidOperationException($"Expected Done but received {result}"); + } + catch (TaskCanceledException) + { + throw new TimeoutException($"ExitAsync operation was cancelled for node {node}"); + } } /// @@ -201,19 +322,32 @@ public Task Exit(RoleName node, int exitValue) /// is the symbolic name of the node which is to be affected /// TBD /// TBD - /// TBD + /// Task indicating completion public Task Shutdown(RoleName node, bool abort = false) + { + // Use the async version with no cancellation token for consistency + return ShutdownAsync(node, abort, CancellationToken.None); + } + + /// + /// Tell the actor system at the remote node to shut itself down without + /// awaiting termination of remote-deployed children. The node will also be + /// removed, so that the remaining nodes may still pass subsequent barriers. + /// + /// is the symbolic name of the node which is to be affected + /// TBD + /// Cancellation token + /// Task indicating completion + public async Task ShutdownAsync(RoleName node, bool abort = false, CancellationToken cancellationToken = default) { // the recover is needed to handle ClientDisconnectedException exception, // which is normal during shutdown - return Controller.Ask(new Terminate(node, new Left(abort)), Settings.QueryTimeout).ContinueWith(t => + var result = await Controller.Ask(new Terminate(node, new Left(abort)), Settings.QueryTimeout, cancellationToken); + return result switch { - if (t.Result is Done) return Done.Instance; - var failure = t.Result as FSMBase.Failure; - if (failure != null && failure.Cause is Controller.ClientDisconnectedException) return Done.Instance; - - throw new InvalidOperationException($"Expected Done but received {t.Result}"); - }); + Done or FSMBase.Failure { Cause: TestKit.Controller.ClientDisconnectedException } => Done.Instance, + _ => throw new InvalidOperationException($"Expected Done but received {result}") + }; } /// @@ -221,7 +355,17 @@ public Task Shutdown(RoleName node, bool abort = false) /// public Task> GetNodes() { - return Controller.Ask>(TestKit.Controller.GetNodes.Instance, Settings.QueryTimeout); + // Use the async version with no cancellation token for consistency + return GetNodesAsync(CancellationToken.None); + } + + /// + /// Async version of GetNodes with cancellation token support. + /// Obtain the list of remote host names currently registered. + /// + public Task> GetNodesAsync(CancellationToken cancellationToken = default) + { + return Controller.Ask>(TestKit.Controller.GetNodes.Instance, Settings.QueryTimeout, cancellationToken); } /// @@ -234,7 +378,23 @@ public Task> GetNodes() /// public Task RemoveNode(RoleName node) { - return Controller.Ask(new Remove(node), Settings.QueryTimeout); + // Use the async version with no cancellation token for consistency + return RemoveNodeAsync(node, CancellationToken.None); + } + + /// + /// Async version of RemoveNode with cancellation token support. + /// Remove a remote host from the list, so that the remaining nodes may still + /// pass subsequent barriers. This must be done before the client connection + /// breaks down in order to affect an "orderly" removal (i.e. without failing + /// present and future barriers). + /// + /// is the symbolic name of the node which is to be removed + /// Cancellation token + /// Task indicating completion + public Task RemoveNodeAsync(RoleName node, CancellationToken cancellationToken = default) + { + return Controller.Ask(new Remove(node), Settings.QueryTimeout, cancellationToken); } } diff --git a/src/core/Akka.Remote.TestKit/MultiNodeSpec.cs b/src/core/Akka.Remote.TestKit/MultiNodeSpec.cs index 4fcefc77149..fe4adebc3b5 100644 --- a/src/core/Akka.Remote.TestKit/MultiNodeSpec.cs +++ b/src/core/Akka.Remote.TestKit/MultiNodeSpec.cs @@ -15,6 +15,7 @@ using System.Reflection; using System.Runtime.CompilerServices; using System.Text; +using System.Threading; using System.Threading.Tasks; using Akka.Actor; using Akka.Actor.Setup; @@ -25,49 +26,49 @@ using Akka.TestKit.Xunit2; using Akka.Util.Internal; -namespace Akka.Remote.TestKit +namespace Akka.Remote.TestKit; + +/// +/// Configure the role names and participants of the test, including configuration settings +/// +public abstract class MultiNodeConfig { + // allows us to avoid NullReferenceExceptions if we make this empty rather than null + // so that way if a MultiNodeConfig doesn't explicitly set CommonConfig to some value + // it will remain safe by defaut + private Config _commonConf = ConfigurationFactory.Empty; + + private ImmutableDictionary _nodeConf = ImmutableDictionary.Create(); + private ImmutableList _roles = ImmutableList.Create(); + private ImmutableDictionary> _deployments = ImmutableDictionary.Create>(); + private ImmutableList _allDeploy = ImmutableList.Create(); + private bool _testTransport = false; + /// - /// Configure the role names and participants of the test, including configuration settings + /// Register a common base config for all test participants, if so desired. /// - public abstract class MultiNodeConfig - { - // allows us to avoid NullReferenceExceptions if we make this empty rather than null - // so that way if a MultiNodeConfig doesn't explicitly set CommonConfig to some value - // it will remain safe by defaut - Config _commonConf = ConfigurationFactory.Empty; - - ImmutableDictionary _nodeConf = ImmutableDictionary.Create(); - ImmutableList _roles = ImmutableList.Create(); - ImmutableDictionary> _deployments = ImmutableDictionary.Create>(); - ImmutableList _allDeploy = ImmutableList.Create(); - bool _testTransport = false; - - /// - /// Register a common base config for all test participants, if so desired. - /// - public Config CommonConfig - { - set { _commonConf = value; } - } + public Config CommonConfig + { + set { _commonConf = value; } + } - /// - /// Register a config override for a specific participant. - /// - public void NodeConfig(IEnumerable roles, IEnumerable configs) - { - var c = configs.Aggregate((a, b) => a.WithFallback(b)); - _nodeConf = _nodeConf.AddRange(roles.Select(r => new KeyValuePair(r, c))); - } + /// + /// Register a config override for a specific participant. + /// + public void NodeConfig(IEnumerable roles, IEnumerable configs) + { + var c = configs.Aggregate((a, b) => a.WithFallback(b)); + _nodeConf = _nodeConf.AddRange(roles.Select(r => new KeyValuePair(r, c))); + } - /// - /// Include for verbose debug logging - /// - /// when `true` debug Config is returned, otherwise config with info logging - public Config DebugConfig(bool on) - { - if (on) - return ConfigurationFactory.ParseString(@" + /// + /// Include for verbose debug logging + /// + /// when `true` debug Config is returned, otherwise config with info logging + public Config DebugConfig(bool on) + { + if (on) + return ConfigurationFactory.ParseString(@" akka.loglevel = DEBUG akka.remote { log-received-messages = on @@ -80,288 +81,282 @@ public Config DebugConfig(bool on) akka.remote.log-remote-lifecycle-events = on akka.log-dead-letters = on "); - return ConfigurationFactory.Empty; - } - - public RoleName Role(string name) - { - if (_roles.Exists(r => r.Name == name)) throw new ArgumentException("non-unique role name " + name); - var roleName = new RoleName(name); - _roles = _roles.Add(roleName); - return roleName; - } + return ConfigurationFactory.Empty; + } - public void DeployOn(RoleName role, string deployment) - { - _deployments.TryGetValue(role, out var roleDeployments); - _deployments = _deployments.SetItem(role, - roleDeployments == null ? ImmutableList.Create(deployment) : roleDeployments.Add(deployment)); - } + public RoleName Role(string name) + { + if (_roles.Exists(r => r.Name == name)) throw new ArgumentException("non-unique role name " + name); + var roleName = new RoleName(name); + _roles = _roles.Add(roleName); + return roleName; + } - public void DeployOnAll(string deployment) - { - _allDeploy = _allDeploy.Add(deployment); - } + public void DeployOn(RoleName role, string deployment) + { + _deployments.TryGetValue(role, out var roleDeployments); + _deployments = _deployments.SetItem(role, + roleDeployments == null ? ImmutableList.Create(deployment) : roleDeployments.Add(deployment)); + } - /// - /// To be able to use `blackhole`, `passThrough`, and `throttle` you must - /// activate the failure injector and throttler transport adapters by - /// specifying `testTransport(on = true)` in your MultiNodeConfig. - /// - public bool TestTransport - { - set { _testTransport = value; } - } + public void DeployOnAll(string deployment) + { + _allDeploy = _allDeploy.Add(deployment); + } - readonly Lazy _myself; + /// + /// To be able to use `blackhole`, `passThrough`, and `throttle` you must + /// activate the failure injector and throttler transport adapters by + /// specifying `testTransport(on = true)` in your MultiNodeConfig. + /// + public bool TestTransport + { + set { _testTransport = value; } + } - protected MultiNodeConfig() - { - var roleName = CommandLine.GetPropertyOrDefault("multinode.role", null); + private readonly Lazy _myself; - if (String.IsNullOrEmpty(roleName)) - { - _myself = new Lazy(() => - { - if (MultiNodeSpec.SelfIndex > _roles.Count) throw new ArgumentException("not enough roles declared for this test"); - return _roles[MultiNodeSpec.SelfIndex]; - }); - } - else - { - _myself = new Lazy(() => - { - var myself = _roles.FirstOrDefault(r => r.Name.Equals(roleName, StringComparison.OrdinalIgnoreCase)); - if (myself == default(RoleName)) throw new ArgumentException($"cannot find {roleName} among configured roles"); - return myself; - }); - } - } + protected MultiNodeConfig() + { + var roleName = CommandLine.GetPropertyOrDefault("multinode.role", null); - public RoleName Myself + if (string.IsNullOrEmpty(roleName)) { - get { return _myself.Value; } + _myself = new Lazy(() => + { + if (MultiNodeSpec.SelfIndex > _roles.Count) throw new ArgumentException("not enough roles declared for this test"); + return _roles[MultiNodeSpec.SelfIndex]; + }); } - - internal Config Config + else { - get + _myself = new Lazy(() => { - var transportConfig = _testTransport ? - ConfigurationFactory.ParseString("akka.remote.dot-netty.tcp.applied-adapters = [trttl, gremlin]") - : ConfigurationFactory.Empty; - - var builder = ImmutableList.CreateBuilder(); - if (_nodeConf.TryGetValue(Myself, out var nodeConfig)) - builder.Add(nodeConfig); - builder.Add(_commonConf); - builder.Add(transportConfig); - builder.Add(MultiNodeSpec.NodeConfig); - builder.Add(MultiNodeSpec.BaseConfig); - - return builder.ToImmutable().Aggregate((a, b) => a.WithFallback(b)); - } + var myself = _roles.FirstOrDefault(r => r.Name.Equals(roleName, StringComparison.OrdinalIgnoreCase)); + if (myself is null) throw new ArgumentException($"cannot find {roleName} among configured roles"); + return myself; + }); } + } - internal ImmutableList Deployments(RoleName node) - { - _deployments.TryGetValue(node, out var deployments); - return deployments == null ? _allDeploy : deployments.AddRange(_allDeploy); - } + public RoleName Myself => _myself.Value; - public ImmutableList Roles + internal Config Config + { + get { - get { return _roles; } + var transportConfig = _testTransport ? + ConfigurationFactory.ParseString("akka.remote.dot-netty.tcp.applied-adapters = [trttl, gremlin]") + : ConfigurationFactory.Empty; + + var builder = ImmutableList.CreateBuilder(); + if (_nodeConf.TryGetValue(Myself, out var nodeConfig)) + builder.Add(nodeConfig); + builder.Add(_commonConf); + builder.Add(transportConfig); + builder.Add(MultiNodeSpec.NodeConfig); + builder.Add(MultiNodeSpec.BaseConfig); + + return builder.ToImmutable().Aggregate((a, b) => a.WithFallback(b)); } } - //TODO: Applicable? - /// - /// Note: To be able to run tests with everything ignored or excluded by tags - /// you must not use `testconductor`, or helper methods that use `testconductor`, - /// from the constructor of your test class. Otherwise the controller node might - /// be shutdown before other nodes have completed and you will see errors like: - /// `AskTimeoutException: sending to terminated ref breaks promises`. Using lazy - /// val is fine. - /// - public abstract class MultiNodeSpec : TestKitBase, IMultiNodeSpecCallbacks, IDisposable + internal ImmutableList Deployments(RoleName node) { - //TODO: Sort out references to Java classes in + _deployments.TryGetValue(node, out var deployments); + return deployments == null ? _allDeploy : deployments.AddRange(_allDeploy); + } + + public ImmutableList Roles => _roles; +} + +//TODO: Applicable? +/// +/// Note: To be able to run tests with everything ignored or excluded by tags +/// you must not use `testconductor`, or helper methods that use `testconductor`, +/// from the constructor of your test class. Otherwise the controller node might +/// be shutdown before other nodes have completed and you will see errors like: +/// `AskTimeoutException: sending to terminated ref breaks promises`. Using lazy +/// val is fine. +/// +public abstract class MultiNodeSpec : TestKitBase, IMultiNodeSpecCallbacks, IDisposable +{ + //TODO: Sort out references to Java classes in - /// - /// Marker used to indicate that has not been set yet. - /// - private const int MaxNodesUnset = -1; - private static int _maxNodes = MaxNodesUnset; + /// + /// Marker used to indicate that has not been set yet. + /// + private const int MaxNodesUnset = -1; + private static int _maxNodes = MaxNodesUnset; - /// - /// Number of nodes node taking part in this test. - /// -Dmultinode.max-nodes=4 - /// - public static int MaxNodes + /// + /// Number of nodes node taking part in this test. + /// -Dmultinode.max-nodes=4 + /// + public static int MaxNodes + { + get { - get + if (_maxNodes == MaxNodesUnset) { - if (_maxNodes == MaxNodesUnset) - { - _maxNodes = CommandLine.GetInt32("multinode.max-nodes"); - } - - if (_maxNodes <= 0) throw new InvalidOperationException("multinode.max-nodes must be greater than 0"); - return _maxNodes; + _maxNodes = CommandLine.GetInt32("multinode.max-nodes"); } + + if (_maxNodes <= 0) throw new InvalidOperationException("multinode.max-nodes must be greater than 0"); + return _maxNodes; } + } + + private static string _multiNodeHost; - private static string _multiNodeHost; - - /// - /// Name (or IP address; must be resolvable) - /// of the host this node is running on - /// - /// -Dmultinode.host=host.example.com - /// - /// InetAddress.getLocalHost.getHostAddress is used if empty or "localhost" - /// is defined as system property "multinode.host". - /// - public static string SelfName + /// + /// Name (or IP address; must be resolvable) + /// of the host this node is running on + /// + /// -Dmultinode.host=host.example.com + /// + /// InetAddress.getLocalHost.getHostAddress is used if empty or "localhost" + /// is defined as system property "multinode.host". + /// + public static string SelfName + { + get { - get + if (string.IsNullOrEmpty(_multiNodeHost)) { - if (string.IsNullOrEmpty(_multiNodeHost)) - { - _multiNodeHost = CommandLine.GetProperty("multinode.host"); - } - - //Run this assertion every time. Consistency is more important than performance. - if (string.IsNullOrEmpty(_multiNodeHost)) throw new InvalidOperationException("multinode.host must not be empty"); - return _multiNodeHost; + _multiNodeHost = CommandLine.GetProperty("multinode.host"); } + + //Run this assertion every time. Consistency is more important than performance. + if (string.IsNullOrEmpty(_multiNodeHost)) throw new InvalidOperationException("multinode.host must not be empty"); + return _multiNodeHost; } + } - /// - /// Marker used to indicate what the "not been set" value of is. - /// - private const int SelfPortUnsetValue = -1; - private static int _selfPort = SelfPortUnsetValue; + /// + /// Marker used to indicate what the "not been set" value of is. + /// + private const int SelfPortUnsetValue = -1; + private static int _selfPort = SelfPortUnsetValue; - /// - /// Port number of this node. Defaults to 0 which means a random port. - /// - /// -Dmultinode.port=0 - /// - public static int SelfPort + /// + /// Port number of this node. Defaults to 0 which means a random port. + /// + /// -Dmultinode.port=0 + /// + public static int SelfPort + { + get { - get + if (_selfPort == SelfPortUnsetValue) //unset { - if (_selfPort == SelfPortUnsetValue) //unset - { - var selfPortStr = CommandLine.GetProperty("multinode.port"); - _selfPort = string.IsNullOrEmpty(selfPortStr) ? 0 : Int32.Parse(selfPortStr); - } - - if (!(_selfPort >= 0 && _selfPort < 65535)) throw new InvalidOperationException("multinode.port is out of bounds: " + _selfPort); - return _selfPort; + var selfPortStr = CommandLine.GetProperty("multinode.port"); + _selfPort = string.IsNullOrEmpty(selfPortStr) ? 0 : Int32.Parse(selfPortStr); } + + if (!(_selfPort >= 0 && _selfPort < 65535)) throw new InvalidOperationException("multinode.port is out of bounds: " + _selfPort); + return _selfPort; } + } - private static string _serverName; - /// - /// Name (or IP address; must be resolvable using InetAddress.getByName) - /// of the host that the server node is running on. - /// - /// -Dmultinode.server-host=server.example.com - /// - public static string ServerName + private static string _serverName; + /// + /// Name (or IP address; must be resolvable using InetAddress.getByName) + /// of the host that the server node is running on. + /// + /// -Dmultinode.server-host=server.example.com + /// + public static string ServerName + { + get { - get + if (string.IsNullOrEmpty(_serverName)) { - if (string.IsNullOrEmpty(_serverName)) - { - _serverName = CommandLine.GetProperty("multinode.server-host"); - } - if (string.IsNullOrEmpty(_serverName)) throw new InvalidOperationException("multinode.server-host must not be empty"); - return _serverName; + _serverName = CommandLine.GetProperty("multinode.server-host"); } + if (string.IsNullOrEmpty(_serverName)) throw new InvalidOperationException("multinode.server-host must not be empty"); + return _serverName; } + } - /// - /// Marker used to indicate what the "not been set" value of is. - /// - private const int ServerPortUnsetValue = -1; + /// + /// Marker used to indicate what the "not been set" value of is. + /// + private const int ServerPortUnsetValue = -1; - /// - /// Default value for - /// - private const int ServerPortDefault = 47110; + /// + /// Default value for + /// + private const int ServerPortDefault = 47110; - private static int _serverPort = ServerPortUnsetValue; + private static int _serverPort = ServerPortUnsetValue; - /// - /// Port number of the node that's running the server system. Defaults to 4711. - /// - /// -Dmultinode.server-port=4711 - /// - public static int ServerPort + /// + /// Port number of the node that's running the server system. Defaults to 4711. + /// + /// -Dmultinode.server-port=4711 + /// + public static int ServerPort + { + get { - get + if (_serverPort == ServerPortUnsetValue) { - if (_serverPort == ServerPortUnsetValue) - { - var serverPortStr = CommandLine.GetProperty("multinode.server-port"); - _serverPort = string.IsNullOrEmpty(serverPortStr) ? ServerPortDefault : Int32.Parse(serverPortStr); - } - - if (!(_serverPort > 0 && _serverPort < 65535)) throw new InvalidOperationException("multinode.server-port is out of bounds: " + _serverPort); - return _serverPort; + var serverPortStr = CommandLine.GetProperty("multinode.server-port"); + _serverPort = string.IsNullOrEmpty(serverPortStr) ? ServerPortDefault : Int32.Parse(serverPortStr); } + + if (!(_serverPort > 0 && _serverPort < 65535)) throw new InvalidOperationException("multinode.server-port is out of bounds: " + _serverPort); + return _serverPort; } + } - /// - /// Marker value used to indicate that has not been set yet. - /// - private const int SelfIndexUnset = -1; + /// + /// Marker value used to indicate that has not been set yet. + /// + private const int SelfIndexUnset = -1; - private static int _selfIndex = SelfIndexUnset; + private static int _selfIndex = SelfIndexUnset; - /// - /// Index of this node in the roles sequence. The TestConductor - /// is started in "controller" mode on selfIndex 0, i.e. there you can inject - /// failures and shutdown other nodes etc. - /// - public static int SelfIndex + /// + /// Index of this node in the roles sequence. The TestConductor + /// is started in "controller" mode on selfIndex 0, i.e. there you can inject + /// failures and shutdown other nodes etc. + /// + public static int SelfIndex + { + get { - get + if (_selfIndex == SelfIndexUnset) { - if (_selfIndex == SelfIndexUnset) - { - _selfIndex = CommandLine.GetInt32("multinode.index"); - } - - if (!(_selfIndex >= 0 && _selfIndex < MaxNodes)) throw new InvalidOperationException("multinode.index is out of bounds: " + _selfIndex); - return _selfIndex; + _selfIndex = CommandLine.GetInt32("multinode.index"); } + + if (!(_selfIndex >= 0 && _selfIndex < MaxNodes)) throw new InvalidOperationException("multinode.index is out of bounds: " + _selfIndex); + return _selfIndex; } + } - public static Config NodeConfig + public static Config NodeConfig + { + get { - get - { - const string config = @" + const string config = @" akka.actor.provider = ""Akka.Remote.RemoteActorRefProvider, Akka.Remote"" akka.remote.dot-netty.tcp.hostname = ""{0}"" akka.remote.dot-netty.tcp.port = {1}"; - return ConfigurationFactory.ParseString(String.Format(config, SelfName, SelfPort)); - } + return ConfigurationFactory.ParseString(String.Format(config, SelfName, SelfPort)); } + } - public static Config BaseConfig + public static Config BaseConfig + { + get { - get - { - return ConfigurationFactory.ParseString( - @"akka { + return ConfigurationFactory.ParseString( + @"akka { loglevel = ""WARNING"" stdout-loglevel = ""WARNING"" coordinated-shutdown.terminate-actor-system = off @@ -381,351 +376,384 @@ public static Config BaseConfig } cluster.downing-provider-class = """" #disable SBR by default }").WithFallback(TestKitBase.DefaultConfig); - } } + } - private readonly RoleName _myself; - public RoleName Myself { get { return _myself; } } - private readonly ILoggingAdapter _log; - private bool _isDisposed; //Automatically initialized to false; - private readonly ImmutableList _roles; - private readonly Func> _deployments; - private readonly ImmutableDictionary _replacements; - private readonly Address _myAddress; - - protected MultiNodeSpec(MultiNodeConfig config, Type type) : - this(config.Myself, ActorSystem.Create(type.Name, config.Config), config.Roles, config.Deployments) - { - } + public RoleName Myself { get; } - protected MultiNodeSpec( - RoleName myself, - ActorSystem system, - ImmutableList roles, - Func> deployments) - : this(myself, system, null, roles, deployments) - { - } + private readonly ILoggingAdapter _log; + private bool _isDisposed; //Automatically initialized to false; + private readonly Func> _deployments; + private readonly ImmutableDictionary _replacements; + private readonly Address _myAddress; - protected MultiNodeSpec( - RoleName myself, - ActorSystemSetup setup, - ImmutableList roles, - Func> deployments) - : this(myself, null, setup, roles, deployments) - { - } + protected MultiNodeSpec(MultiNodeConfig config, Type type) : + this(config.Myself, ActorSystem.Create(type.Name, config.Config), config.Roles, config.Deployments) + { + } - private MultiNodeSpec( - RoleName myself, - ActorSystem system, - ActorSystemSetup setup, - ImmutableList roles, - Func> deployments) - : base(new XunitAssertions(), system, setup, null, null) - { - _myself = myself; - _log = Logging.GetLogger(Sys, this); - _roles = roles; - _deployments = deployments; + protected MultiNodeSpec( + RoleName myself, + ActorSystem system, + ImmutableList roles, + Func> deployments) + : this(myself, system, null, roles, deployments) + { + } - var node = new IPEndPoint(Dns.GetHostAddresses(ServerName)[0], ServerPort); - _controllerAddr = node; + protected MultiNodeSpec( + RoleName myself, + ActorSystemSetup setup, + ImmutableList roles, + Func> deployments) + : this(myself, null, setup, roles, deployments) + { + } - AttachConductor(new TestConductor(Sys)); + private MultiNodeSpec( + RoleName myself, + ActorSystem system, + ActorSystemSetup setup, + ImmutableList roles, + Func> deployments) + : base(new XunitAssertions(), system, setup, null, null) + { + Myself = myself; + _log = Logging.GetLogger(Sys, this); + Roles = roles; + _deployments = deployments; - _replacements = _roles.ToImmutableDictionary(r => r, r => new Replacement("@" + r.Name + "@", r, this)); + var node = new IPEndPoint(Dns.GetHostAddresses(ServerName)[0], ServerPort); + _controllerAddr = node; - InjectDeployments(Sys, myself); + AttachConductor(new TestConductor(Sys)); - _myAddress = Sys.AsInstanceOf().Provider.DefaultAddress; + _replacements = Roles.ToImmutableDictionary(r => r, r => new Replacement("@" + r.Name + "@", r, this)); - Log.Info("Role [{0}] started with address [{1}]", myself.Name, _myAddress); - MultiNodeSpecBeforeAll(); - } + InjectDeployments(Sys, myself); - public void MultiNodeSpecBeforeAll() - { - AtStartup(); - } + _myAddress = Sys.AsInstanceOf().Provider.DefaultAddress; - public void MultiNodeSpecAfterAll() + Log.Info("Role [{0}] started with address [{1}]", myself.Name, _myAddress); + MultiNodeSpecBeforeAll(); + } + + public void MultiNodeSpecBeforeAll() + { + AtStartup(); + } + + public void MultiNodeSpecAfterAll() + { + // wait for all nodes to remove themselves before we shut the conductor down + if (SelfIndex == 0) { - // wait for all nodes to remove themselves before we shut the conductor down - if (SelfIndex == 0) - { - TestConductor.RemoveNode(_myself); - Within(TestConductor.Settings.BarrierTimeout, () => - AwaitCondition(() => TestConductor.GetNodes().Result.All(n => n.Equals(_myself)))); + TestConductor.RemoveNode(Myself); + Within(TestConductor.Settings.BarrierTimeout, () => + AwaitCondition(() => TestConductor.GetNodes().Result.All(n => n.Equals(Myself)))); - } - Shutdown(Sys); - AfterTermination(); } + Shutdown(Sys); + AfterTermination(); + } - protected virtual TimeSpan ShutdownTimeout { get { return TimeSpan.FromSeconds(5); } } + protected virtual TimeSpan ShutdownTimeout { get { return TimeSpan.FromSeconds(5); } } - /// - /// Override this and return `true` to assert that the - /// shutdown of the `ActorSystem` was done properly. - /// - protected virtual bool VerifySystemShutdown { get { return false; } } + /// + /// Override this and return `true` to assert that the + /// shutdown of the `ActorSystem` was done properly. + /// + protected virtual bool VerifySystemShutdown { get { return false; } } - //Test Class Interface + //Test Class Interface - /// - /// Override this method to do something when the whole test is starting up. - /// - protected virtual void AtStartup() - { - } + /// + /// Override this method to do something when the whole test is starting up. + /// + protected virtual void AtStartup() + { + } + + /// + /// Override this method to do something when the whole test is terminating. + /// + protected virtual void AfterTermination() + { + } + + /// + /// All registered roles + /// + public ImmutableList Roles { get; } - /// - /// Override this method to do something when the whole test is terminating. - /// - protected virtual void AfterTermination() + /// + /// MUST BE DEFINED BY USER. + /// + /// Defines the number of participants required for starting the test. This + /// might not be equals to the number of nodes available to the test. + /// + public int InitialParticipants + { + get { + var initialParticipants = InitialParticipantsValueFactory; + if (initialParticipants <= 0) throw new InvalidOperationException("InitialParticipantsValueFactory must be populated early on, and it must be greater zero"); + if (initialParticipants > MaxNodes) throw new InvalidOperationException("not enough nodes to run this test"); + return initialParticipants; } - /// - /// All registered roles - /// - public ImmutableList Roles { get { return _roles; } } - - /// - /// MUST BE DEFINED BY USER. - /// - /// Defines the number of participants required for starting the test. This - /// might not be equals to the number of nodes available to the test. - /// - public int InitialParticipants - { - get - { - var initialParticipants = InitialParticipantsValueFactory; - if (initialParticipants <= 0) throw new InvalidOperationException("InitialParticipantsValueFactory must be populated early on, and it must be greater zero"); - if (initialParticipants > MaxNodes) throw new InvalidOperationException("not enough nodes to run this test"); - return initialParticipants; - } + } - } + /// + /// Must be defined by user. Creates the values used by + /// + protected abstract int InitialParticipantsValueFactory { get; } - /// - /// Must be defined by user. Creates the values used by - /// - protected abstract int InitialParticipantsValueFactory { get; } + protected TestConductor TestConductor; - protected TestConductor TestConductor; + /// + /// Execute the given block of code only on the given nodes (names according + /// to the `roleMap`). + /// + public void RunOn(Action thunk, params RoleName[] nodes) + { + if (IsNode(nodes)) thunk(); + } - /// - /// Execute the given block of code only on the given nodes (names according - /// to the `roleMap`). - /// - public void RunOn(Action thunk, params RoleName[] nodes) - { - if (IsNode(nodes)) thunk(); - } + /// + /// Execute the given block of code only on the given nodes (names according + /// to the `roleMap`). + /// + public async Task RunOnAsync(Func thunkAsync, params RoleName[] nodes) + { + if (IsNode(nodes)) await thunkAsync(); + } - /// - /// Execute the given block of code only on the given nodes (names according - /// to the `roleMap`). - /// - public async Task RunOnAsync(Func thunkAsync, params RoleName[] nodes) - { - if (IsNode(nodes)) await thunkAsync(); - } + /// + /// Verify that the running node matches one of the given nodes + /// + public bool IsNode(params RoleName[] nodes) + { + return nodes.Contains(Myself); + } - /// - /// Verify that the running node matches one of the given nodes - /// - public bool IsNode(params RoleName[] nodes) - { - return nodes.Contains(_myself); - } + /// + /// Enter the named barriers in the order given. Use the remaining duration from + /// the innermost enclosing `within` block or the default `BarrierTimeout` + /// + public void EnterBarrier(params string[] name) + { + TestConductor.Enter(RemainingOr(TestConductor.Settings.BarrierTimeout), Myself, name.ToImmutableList()); + } - /// - /// Enter the named barriers in the order given. Use the remaining duration from - /// the innermost enclosing `within` block or the default `BarrierTimeout` - /// - public void EnterBarrier(params string[] name) - { - TestConductor.Enter(RemainingOr(TestConductor.Settings.BarrierTimeout), Myself, name.ToImmutableList()); - } + /// + /// Async version of EnterBarrier. Enter the named barriers in the order given. + /// Use the remaining duration from the innermost enclosing `within` block or the default `BarrierTimeout` + /// + public Task EnterBarrierAsync(params string[] name) + { + return EnterBarrierAsync(CancellationToken.None, name); + } - /// - /// Query the controller for the transport address of the given node (by role name) and - /// return that as an ActorPath for easy composition: - /// - /// var serviceA = Sys.ActorSelection(Node(new RoleName("master")) / "user" / "serviceA"); - /// - public ActorPath Node(RoleName role) - { - //TODO: Async stuff here - return new RootActorPath(TestConductor.GetAddressFor(role).Result); - } + /// + /// Async version of EnterBarrier with cancellation support. Enter the named barriers in the order given. + /// Use the remaining duration from the innermost enclosing `within` block or the default `BarrierTimeout` + /// + public Task EnterBarrierAsync(CancellationToken cancellationToken, params string[] name) + { + return TestConductor.EnterAsync(RemainingOr(TestConductor.Settings.BarrierTimeout), Myself, name.ToImmutableList(), cancellationToken); + } - public void MuteDeadLetters(ActorSystem system = null, params Type[] messageClasses) + /// + /// Query the controller for the transport address of the given node (by role name) and + /// return that as an ActorPath for easy composition: + /// + /// var serviceA = Sys.ActorSelection(Node(new RoleName("master")) / "user" / "serviceA"); + /// + public ActorPath Node(RoleName role) + { + return NodeAsync(role).GetAwaiter().GetResult(); + } + + /// + /// Async version of Node. Query the controller for the transport address of the given node (by role name) and + /// return that as an ActorPath for easy composition. + /// + public async Task NodeAsync(RoleName role, CancellationToken cancellationToken = default) + { + var address = await TestConductor.GetAddressForAsync(role, cancellationToken); + return new RootActorPath(address); + } + + public void MuteDeadLetters(ActorSystem system = null, params Type[] messageClasses) + { + if (system == null) system = Sys; + if (!system.Log.IsDebugEnabled) { - if (system == null) system = Sys; - if (!system.Log.IsDebugEnabled) - { - if (messageClasses.Any()) - foreach (var @class in messageClasses) EventFilter.DeadLetter(@class).Mute(); - else EventFilter.DeadLetter(typeof(object)).Mute(); - } + if (messageClasses.Any()) + foreach (var @class in messageClasses) EventFilter.DeadLetter(@class).Mute(); + else EventFilter.DeadLetter(typeof(object)).Mute(); } + } - /* - * Implementation (i.e. wait for start etc.) - */ + /* + * Implementation (i.e. wait for start etc.) + */ - private readonly IPEndPoint _controllerAddr; + private readonly IPEndPoint _controllerAddr; - protected void AttachConductor(TestConductor tc) + protected void AttachConductor(TestConductor tc) + { + AttachConductorAsync(tc, CancellationToken.None).GetAwaiter().GetResult(); + } + + protected async Task AttachConductorAsync(TestConductor tc, CancellationToken cancellationToken = default) + { + using var cts = cancellationToken is { CanBeCanceled: true } + ? CancellationTokenSource.CreateLinkedTokenSource(cancellationToken) + : new CancellationTokenSource(); + cts.CancelAfter(tc.Settings.BarrierTimeout); + try { - var timeout = tc.Settings.BarrierTimeout; - try - { - //TODO: Async stuff - if (SelfIndex == 0) - tc.StartController(InitialParticipants, _myself, _controllerAddr).Wait(timeout); - else - tc.StartClient(_myself, _controllerAddr).Wait(timeout); - } - catch (Exception e) - { - throw new Exception("failure while attaching new conductor", e); - } - TestConductor = tc; + if (SelfIndex == 0) + await tc.StartControllerAsync(InitialParticipants, Myself, _controllerAddr, cts.Token); + else + await tc.StartClientAsync(Myself, _controllerAddr, cts.Token); + } + catch (Exception e) + { + throw new Exception("failure while attaching new conductor", e); } + TestConductor = tc; + } - // now add deployments, if so desired + // now add deployments, if so desired - private sealed class Replacement - { - public string Tag { get; } - public RoleName Role { get; } - private readonly Lazy _addr; - public string Addr { get { return _addr.Value; } } + private sealed class Replacement + { + public string Tag { get; } + public RoleName Role { get; } + private readonly Lazy _addr; + public string Addr { get { return _addr.Value; } } - public Replacement(string tag, RoleName role, MultiNodeSpec spec) - { - Tag = tag; - Role = role; - _addr = new Lazy(() => spec.Node(role).Address.ToString()); - } + public Replacement(string tag, RoleName role, MultiNodeSpec spec) + { + Tag = tag; + Role = role; + _addr = new Lazy(() => spec.Node(role).Address.ToString()); } + } - protected void InjectDeployments(ActorSystem system, RoleName role) + protected void InjectDeployments(ActorSystem system, RoleName role) + { + var deployer = system.AsInstanceOf().Provider.Deployer; + foreach (var str in _deployments(role)) { - var deployer = system.AsInstanceOf().Provider.Deployer; - foreach (var str in _deployments(role)) + var deployString = _replacements.Values.Aggregate(str, (@base, r) => { - var deployString = _replacements.Values.Aggregate(str, (@base, r) => + var indexOf = @base.IndexOf(r.Tag, StringComparison.Ordinal); + if (indexOf == -1) return @base; + string replaceWith; + try { - var indexOf = @base.IndexOf(r.Tag, StringComparison.Ordinal); - if (indexOf == -1) return @base; - string replaceWith; - try - { - replaceWith = r.Addr; - } - catch (Exception e) - { - // might happen if all test cases are ignored (excluded) and - // controller node is finished/exited before r.addr is run - // on the other nodes - var unresolved = "akka://unresolved-replacement-" + r.Role.Name; - Log.Warning(unresolved + " due to: {0}", e.ToString()); - replaceWith = unresolved; - } - return @base.Replace(r.Tag, replaceWith); - }); - foreach (var pair in ConfigurationFactory.ParseString(deployString).AsEnumerable()) + replaceWith = r.Addr; + } + catch (Exception e) { - if (pair.Value.IsObject()) - { - var deploy = - deployer.ParseConfig(pair.Key, new Config(new HoconRoot(pair.Value))); - deployer.SetDeploy(deploy); - } - else - { - throw new ArgumentException(String.Format("key {0} must map to deployment section, not simple value {1}", - pair.Key, pair.Value)); - } + // might happen if all test cases are ignored (excluded) and + // controller node is finished/exited before r.addr is run + // on the other nodes + var unresolved = "akka://unresolved-replacement-" + r.Role.Name; + Log.Warning(unresolved + " due to: {0}", e.ToString()); + replaceWith = unresolved; + } + return @base.Replace(r.Tag, replaceWith); + }); + foreach (var pair in ConfigurationFactory.ParseString(deployString).AsEnumerable()) + { + if (pair.Value.IsObject()) + { + var deploy = deployer.ParseConfig(pair.Key, new Config(new HoconRoot(pair.Value))); + deployer.SetDeploy(deploy); + } + else + { + throw new ArgumentException($"key {pair.Key} must map to deployment section, not simple value {pair.Value}"); } } } + } - protected ActorSystem StartNewSystem() - { - var sb = - new StringBuilder("akka.remote.dot-netty.tcp{").AppendLine() - .AppendFormat("port={0}", _myAddress.Port) - .AppendLine() - .AppendFormat(@"hostname=""{0}""", _myAddress.Host) - .AppendLine("}"); - var config = - ConfigurationFactory + protected ActorSystem StartNewSystem() + { + return StartNewSystemAsync(CancellationToken.None).GetAwaiter().GetResult(); + } + + protected async Task StartNewSystemAsync(CancellationToken cancellationToken = default) + { + var sb = + new StringBuilder("akka.remote.dot-netty.tcp{").AppendLine() + .AppendFormat("port={0}", _myAddress.Port) + .AppendLine() + .AppendFormat(@"hostname=""{0}""", _myAddress.Host) + .AppendLine("}"); + var config = + ConfigurationFactory .ParseString(sb.ToString()) .WithFallback(Sys.Settings.Config); - var system = ActorSystem.Create(Sys.Name, config); - InjectDeployments(system, _myself); - AttachConductor(new TestConductor(system)); - return system; - } - + var system = ActorSystem.Create(Sys.Name, config); + InjectDeployments(system, Myself); + await AttachConductorAsync(new TestConductor(system), cancellationToken); + return system; + } - public void Dispose() - { - Dispose(true); - //Take this object off the finalization queue and prevent finalization code for this object - //from executing a second time. - GC.SuppressFinalize(this); - } + public void Dispose() + { + Dispose(true); + //Take this object off the finalization queue and prevent finalization code for this object + //from executing a second time. + GC.SuppressFinalize(this); + } - /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. - /// if set to true the method has been called directly or indirectly by a - /// user's code. Managed and unmanaged resources will be disposed.
- /// if set to false the method has been called by the runtime from inside the finalizer and only - /// unmanaged resources can be disposed. - protected void Dispose(bool disposing) - { - // If disposing equals false, the method has been called by the - // runtime from inside the finalizer and you should not reference - // other objects. Only unmanaged resources can be disposed. + /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. + /// if set to true the method has been called directly or indirectly by a + /// user's code. Managed and unmanaged resources will be disposed.
+ /// if set to false the method has been called by the runtime from inside the finalizer and only + /// unmanaged resources can be disposed. + protected void Dispose(bool disposing) + { + // If disposing equals false, the method has been called by the + // runtime from inside the finalizer and you should not reference + // other objects. Only unmanaged resources can be disposed. - //Make sure Dispose does not get called more than once, by checking the disposed field - if (!_isDisposed) + //Make sure Dispose does not get called more than once, by checking the disposed field + if (!_isDisposed) + { + if (disposing) { - if (disposing) - { - Console.WriteLine("---------------DISPOSING--------------------"); - MultiNodeSpecAfterAll(); - } + Console.WriteLine("---------------DISPOSING--------------------"); + MultiNodeSpecAfterAll(); } - _isDisposed = true; } + _isDisposed = true; } +} - //TODO: Improve docs +//TODO: Improve docs +/// +/// Use this to hook into your test framework lifecycle +/// +public interface IMultiNodeSpecCallbacks +{ /// - /// Use this to hook into your test framework lifecycle + /// Call this before the start of the test run. NOT before every test case. /// - public interface IMultiNodeSpecCallbacks - { - /// - /// Call this before the start of the test run. NOT before every test case. - /// - void MultiNodeSpecBeforeAll(); - - /// - /// Call this after the all test cases have run. NOT after every test case. - /// - void MultiNodeSpecAfterAll(); - } -} + void MultiNodeSpecBeforeAll(); + /// + /// Call this after the all test cases have run. NOT after every test case. + /// + void MultiNodeSpecAfterAll(); +} \ No newline at end of file diff --git a/src/core/Akka.Remote.TestKit/Player.cs b/src/core/Akka.Remote.TestKit/Player.cs index dd182dd9eff..4ebaf38d4c0 100644 --- a/src/core/Akka.Remote.TestKit/Player.cs +++ b/src/core/Akka.Remote.TestKit/Player.cs @@ -22,572 +22,646 @@ using DotNetty.Transport.Channels; using Akka.Configuration; -namespace Akka.Remote.TestKit +namespace Akka.Remote.TestKit; + +/// +/// The Player is the client component of the +/// test conductor extension. It registers with +/// the conductor's controller +/// in order to participate in barriers and enable network failure injection +/// +partial class TestConductor //Player trait in JVM version { - /// - /// The Player is the client component of the - /// test conductor extension. It registers with - /// the conductor's controller - /// in order to participate in barriers and enable network failure injection - /// - partial class TestConductor //Player trait in JVM version - { - private IActorRef _client; + private IActorRef _client; - public IActorRef Client + public IActorRef Client + { + get { - get - { - if(_client == null) throw new IllegalStateException("TestConductor client not yet started"); - if(_system.WhenTerminated.IsCompleted) throw new IllegalStateException("TestConductor unavailable because system is terminated; you need to StartNewSystem() before this point"); - return _client; - } + if(_client == null) throw new IllegalStateException("TestConductor client not yet started"); + if(_system.WhenTerminated.IsCompleted) throw new IllegalStateException("TestConductor unavailable because system is terminated; you need to StartNewSystem() before this point"); + return _client; } + } - /// - /// Connect to the conductor on the given port (the host is taken from setting - /// `akka.testconductor.host`). The connection is made asynchronously, but you - /// should await completion of the returned Future because that implies that - /// all expected participants of this test have successfully connected (i.e. - /// this is a first barrier in itself). The number of expected participants is - /// set in `.startController()`. - /// - public Task StartClient(RoleName name, IPEndPoint controllerAddr) - { - if(_client != null) throw new IllegalStateException("TestConductorClient already started"); - _client = - _system.ActorOf(Props.Create(() => new ClientFSM(name, controllerAddr)), "TestConductorClient"); - - var a = _system.ActorOf(Props.Create()); + /// + /// Connect to the conductor on the given port (the host is taken from setting + /// `akka.testconductor.host`). The connection is made asynchronously, but you + /// should await completion of the returned Future because that implies that + /// all expected participants of this test have successfully connected (i.e. + /// this is a first barrier in itself). The number of expected participants is + /// set in `.startController()`. + /// + public Task StartClient(RoleName name, IPEndPoint controllerAddr) + { + // Use the async version with no cancellation token for consistency + return StartClientAsync(name, controllerAddr, CancellationToken.None); + } - return a.Ask(_client); - } + /// + /// Connect to the conductor on the given port (the host is taken from setting + /// `akka.testconductor.host`). The connection is made asynchronously, but you + /// should await completion of the returned Future because that implies that + /// all expected participants of this test have successfully connected (i.e. + /// this is a first barrier in itself). The number of expected participants is + /// set in `.startController()`. + /// + public Task StartClientAsync(RoleName name, IPEndPoint controllerAddr, CancellationToken cancellationToken = default) + { + if(_client != null) + throw new IllegalStateException("TestConductorClient already started"); + + _client = _system.ActorOf(Props.Create(() => new ClientFSM(name, controllerAddr)), "TestConductorClient"); + + var a = _system.ActorOf(Props.Create()); + return a.Ask(_client, cancellationToken); + } - private class WaitForClientFSMToConnect : UntypedActor - { - IActorRef _waiting; + private class WaitForClientFSMToConnect : UntypedActor + { + IActorRef _waiting; - protected override void OnReceive(object message) + protected override void OnReceive(object message) + { + if (message is IActorRef fsm) { - if (message is IActorRef fsm) - { - _waiting = Sender; - fsm.Tell(new FSMBase.SubscribeTransitionCallBack(Self)); - return; - } + _waiting = Sender; + fsm.Tell(new FSMBase.SubscribeTransitionCallBack(Self)); + return; + } - if (message is FSMBase.Transition transition) + if (message is FSMBase.Transition transition) + { + switch (transition.From) { - switch (transition.From) - { - case ClientFSM.State.Connecting when transition.To == ClientFSM.State.AwaitDone: - return; - case ClientFSM.State.AwaitDone when transition.To == ClientFSM.State.Connected: - _waiting.Tell(Done.Instance); - Context.Stop(Self); - return; - default: - _waiting.Tell(new Exception("unexpected transition: " + transition)); - Context.Stop(Self); - break; - } + case ClientFSM.State.Connecting when transition.To == ClientFSM.State.AwaitDone: + return; + case ClientFSM.State.AwaitDone when transition.To == ClientFSM.State.Connected: + _waiting.Tell(Done.Instance); + Context.Stop(Self); + return; + default: + _waiting.Tell(new Exception("unexpected transition: " + transition)); + Context.Stop(Self); + break; } - - if (message is not FSMBase.CurrentState { State: ClientFSM.State.Connected }) return; - _waiting.Tell(Done.Instance); - Context.Stop(Self); } - } - /// - /// Enter the named barriers, one after the other, in the order given. Will - /// throw an exception in case of timeouts or other errors. - /// - public void Enter(RoleName roleName, string name) - { - Enter(Settings.BarrierTimeout, roleName, ImmutableList.Create(name)); + if (message is not FSMBase.CurrentState { State: ClientFSM.State.Connected }) return; + _waiting.Tell(Done.Instance); + Context.Stop(Self); } + } - /// - /// Enter the named barriers, one after the other, in the order given. Will - /// throw an exception in case of timeouts or other errors. - /// - public void Enter(TimeSpan timeout, RoleName roleName, ImmutableList names) + /// + /// Enter the named barriers, one after the other, in the order given. Will + /// throw an exception in case of timeouts or other errors. + /// + public void Enter(RoleName roleName, string name) + { + // Use sync-over-async pattern to maintain single source of truth + try { - _system.Log.Debug("entering barriers {0}", names.Aggregate((a, b) => "(" + a + "," + b + ")")); - var stop = Deadline.Now + timeout; - - foreach (var name in names) - { - var barrierTimeout = stop.TimeLeft; - if (barrierTimeout.Ticks < 0) - { - _client.Tell(new ToServer(new FailBarrier(name, roleName))); - throw new TimeoutException("Server timed out while waiting for barrier " + name); - } - try - { - var askTimeout = barrierTimeout + Settings.QueryTimeout; - // Need to force barrier to wait here, so we can pass along a "fail barrier" message in the event - // of a failed operation - var result = _client.Ask(new ToServer(new EnterBarrier(name, barrierTimeout, roleName)), askTimeout).Result; - } - catch (AggregateException ex) - { - _client.Tell(new ToServer(new FailBarrier(name, roleName))); - throw new TimeoutException("Client timed out while waiting for barrier " + name, ex); - } - catch (OperationCanceledException) - { - _system.Log.Debug("OperationCanceledException was thrown instead of AggregateException"); - } - _system.Log.Debug("passed barrier {0}", name); - } + EnterAsync(Settings.BarrierTimeout, roleName, ImmutableList.Create(name), CancellationToken.None).GetAwaiter().GetResult(); } - - public Task
GetAddressFor(RoleName name) + catch (AggregateException ex) when (ex.InnerException != null) { - return _client.Ask
(new ToServer(new GetAddress(name)), Settings.QueryTimeout); + throw ex.InnerException; } } /// - /// This is the controlling entity on the player - /// side: in a first step it registers itself with a symbolic name and its remote - /// address at the , then waits for the - /// `Done` message which signals that all other expected test participants have - /// done the same. After that, it will pass barrier requests to and from the - /// coordinator and react to the Conductors’s - /// requests for failure injection. - /// - /// Note that you can't perform requests concurrently, e.g. enter barrier - /// from one thread and ask for node address from another thread. - /// - /// INTERNAL API. + /// Async version of Enter. Enter the named barrier. + /// Will throw an exception in case of timeouts or other errors. /// - [InternalApi] - internal class ClientFSM : FSM, ILoggingFSM + public Task EnterAsync(RoleName roleName, string name, CancellationToken cancellationToken = default) { - public enum State + return EnterAsync(Settings.BarrierTimeout, roleName, ImmutableList.Create(name), cancellationToken); + } + + /// + /// Enter the named barriers, one after the other, in the order given. Will + /// throw an exception in case of timeouts or other errors. + /// + public void Enter(TimeSpan timeout, RoleName roleName, ImmutableList names) + { + // Use sync-over-async pattern to maintain single source of truth + try { - Connecting, - AwaitDone, - Connected, - Failed + EnterAsync(timeout, roleName, names, CancellationToken.None).GetAwaiter().GetResult(); } - - internal class Data + catch (AggregateException ex) when (ex.InnerException != null) { - readonly IChannel _channel; - public IChannel Channel { get { return _channel; } } - readonly (string, IActorRef)? _runningOp; - public (string, IActorRef)? RunningOp => _runningOp; + throw ex.InnerException; + } + } + + /// + /// Async version of Enter. Enter the named barriers, one after the other, in the order given. + /// Will throw an exception in case of timeouts or other errors. + /// + public async Task EnterAsync(TimeSpan timeout, RoleName roleName, ImmutableList names, CancellationToken cancellationToken = default) + { + _system.Log.Debug("entering barriers {0}", names.Aggregate((a, b) => "(" + a + "," + b + ")")); + var stop = Deadline.Now + timeout; - public Data(IChannel channel, (string, IActorRef)? runningOp) + foreach (var name in names) + { + var barrierTimeout = stop.TimeLeft; + if (barrierTimeout.Ticks < 0) { - _channel = channel; - _runningOp = runningOp; + _client.Tell(new ToServer(new FailBarrier(name, roleName))); + throw new TimeoutException("Server timed out while waiting for barrier " + name); } - - private bool Equals(Data other) + try { - return Equals(_channel, other._channel) && Equals(_runningOp, other._runningOp); + var askTimeout = barrierTimeout + Settings.QueryTimeout; + // Use async ask with cancellation token + var result = await _client.Ask(new ToServer(new EnterBarrier(name, barrierTimeout, roleName)), askTimeout, cancellationToken); } - - /// - public override bool Equals(object obj) + catch (TaskCanceledException ex) { - if (ReferenceEquals(null, obj)) return false; - if (ReferenceEquals(this, obj)) return true; - if (obj.GetType() != GetType()) return false; - return Equals((Data) obj); + _client.Tell(new ToServer(new FailBarrier(name, roleName))); + throw new TimeoutException("Client timed out while waiting for barrier " + name, ex); } - - /// - public override int GetHashCode() + catch (OperationCanceledException ex) { - unchecked - { - return ((_channel != null ? _channel.GetHashCode() : 0) * 397) - ^ (_runningOp != null ? _runningOp.GetHashCode() : 0); - } + _client.Tell(new ToServer(new FailBarrier(name, roleName))); + throw new TimeoutException("Operation was cancelled while waiting for barrier " + name, ex); } + _system.Log.Debug("passed barrier {0}", name); + } + } - /// - /// Compares two specified for equality. - /// - /// The first used for comparison - /// The second used for comparison - /// true if both are equal; otherwise false - public static bool operator ==(Data left, Data right) - { - return Equals(left, right); - } + public Task
GetAddressFor(RoleName name) + { + return GetAddressForAsync(name, CancellationToken.None); + } - /// - /// Compares two specified for inequality. - /// - /// The first used for comparison - /// The second used for comparison - /// true if both are not equal; otherwise false - public static bool operator !=(Data left, Data right) - { - return !Equals(left, right); - } + /// + /// Async version of GetAddressFor with cancellation token support. + /// + public Task
GetAddressForAsync(RoleName name, CancellationToken cancellationToken = default) + { + return _client.Ask
(new ToServer(new GetAddress(name)), Settings.QueryTimeout, cancellationToken); + } +} - public Data Copy((string, IActorRef)? runningOp) - { - return new Data(Channel, runningOp); - } +/// +/// This is the controlling entity on the player +/// side: in a first step it registers itself with a symbolic name and its remote +/// address at the , then waits for the +/// `Done` message which signals that all other expected test participants have +/// done the same. After that, it will pass barrier requests to and from the +/// coordinator and react to the Conductors’s +/// requests for failure injection. +/// +/// Note that you can't perform requests concurrently, e.g. enter barrier +/// from one thread and ask for node address from another thread. +/// +/// INTERNAL API. +/// +[InternalApi] +internal class ClientFSM : FSM, ILoggingFSM +{ + public enum State + { + Connecting, + AwaitDone, + Connected, + Failed + } + + internal class Data + { + public IChannel Channel { get; } + public (string, IActorRef)? RunningOp { get; } + + public Data(IChannel channel, (string, IActorRef)? runningOp) + { + Channel = channel; + RunningOp = runningOp; + } + + private bool Equals(Data other) + { + return Equals(Channel, other.Channel) && Equals(RunningOp, other.RunningOp); } - internal class Connected : INoSerializationVerificationNeeded + /// + public override bool Equals(object obj) { - readonly IChannel _channel; - public IChannel Channel{get { return _channel; }} + if (ReferenceEquals(null, obj)) return false; + if (ReferenceEquals(this, obj)) return true; + return obj is Data data && Equals(data); + } - public Connected(IChannel channel) + /// + public override int GetHashCode() + { + unchecked { - _channel = channel; + return ((Channel != null ? Channel.GetHashCode() : 0) * 397) + ^ (RunningOp != null ? RunningOp.GetHashCode() : 0); } + } - protected bool Equals(Connected other) - { - return Equals(_channel, other._channel); - } + /// + /// Compares two specified for equality. + /// + /// The first used for comparison + /// The second used for comparison + /// true if both are equal; otherwise false + public static bool operator ==(Data left, Data right) + { + return Equals(left, right); + } - /// - public override bool Equals(object obj) - { - if (ReferenceEquals(null, obj)) return false; - if (ReferenceEquals(this, obj)) return true; - if (obj.GetType() != this.GetType()) return false; - return Equals((Connected) obj); - } + /// + /// Compares two specified for inequality. + /// + /// The first used for comparison + /// The second used for comparison + /// true if both are not equal; otherwise false + public static bool operator !=(Data left, Data right) + { + return !Equals(left, right); + } - /// - public override int GetHashCode() - { - return (_channel != null ? _channel.GetHashCode() : 0); - } + public Data Copy((string, IActorRef)? runningOp) + { + return new Data(Channel, runningOp); + } + } - /// - /// Compares two specified for equality. - /// - /// The first used for comparison - /// The second used for comparison - /// true if both are equal; otherwise false - public static bool operator ==(Connected left, Connected right) - { - return Equals(left, right); - } + internal class Connected : INoSerializationVerificationNeeded + { + public IChannel Channel { get; } - /// - /// Compares two specified for inequality. - /// - /// The first used for comparison - /// The second used for comparison - /// true if both are not equal; otherwise false - public static bool operator !=(Connected left, Connected right) - { - return !Equals(left, right); - } + public Connected(IChannel channel) + { + Channel = channel; } - /// - /// TBD - /// - internal class ConnectionFailure : Exception + protected bool Equals(Connected other) { - /// - /// Initializes a new instance of the class. - /// - /// The message that describes the error. - public ConnectionFailure(string message) : base(message) - { - } + return Equals(Channel, other.Channel); } - internal class Disconnected + /// + public override bool Equals(object obj) { - private Disconnected() { } - public static Disconnected Instance { get; } = new(); + if (ReferenceEquals(null, obj)) return false; + if (ReferenceEquals(this, obj)) return true; + return obj is Connected connected && Equals(connected); } - private readonly ILoggingAdapter _log = Context.GetLogger(); - readonly TestConductorSettings _settings; - readonly PlayerHandler _handler; - readonly RoleName _name; + /// + public override int GetHashCode() + { + return (Channel != null ? Channel.GetHashCode() : 0); + } - public ClientFSM(RoleName name, IPEndPoint controllerAddr) + /// + /// Compares two specified for equality. + /// + /// The first used for comparison + /// The second used for comparison + /// true if both are equal; otherwise false + public static bool operator ==(Connected left, Connected right) { - _settings = TestConductor.Get(Context.System).Settings; - _handler = new PlayerHandler(controllerAddr, _settings.ClientReconnects, _settings.ReconnectBackoff, - _settings.ClientSocketWorkerPoolSize, Self, Logging.GetLogger(Context.System, "PlayerHandler"), - Context.System.Scheduler); - _name = name; + return Equals(left, right); + } + + /// + /// Compares two specified for inequality. + /// + /// The first used for comparison + /// The second used for comparison + /// true if both are not equal; otherwise false + public static bool operator !=(Connected left, Connected right) + { + return !Equals(left, right); + } + } - InitFSM(); + /// + /// TBD + /// + internal class ConnectionFailure : Exception + { + /// + /// Initializes a new instance of the class. + /// + /// The message that describes the error. + public ConnectionFailure(string message) : base(message) + { } + } + + internal class Disconnected + { + private Disconnected() { } + public static Disconnected Instance { get; } = new(); + } + + private readonly ILoggingAdapter _log = Context.GetLogger(); + private readonly TestConductorSettings _settings; + private readonly PlayerHandler _handler; + private readonly RoleName _name; + + public ClientFSM(RoleName name, IPEndPoint controllerAddr) + { + _settings = TestConductor.Get(Context.System).Settings; + _handler = new PlayerHandler(controllerAddr, _settings.ClientReconnects, _settings.ReconnectBackoff, + _settings.ClientSocketWorkerPoolSize, Self, Logging.GetLogger(Context.System, "PlayerHandler"), + Context.System.Scheduler); + _name = name; + + InitFSM(); + } + + public void InitFSM() + { + StartWith(State.Connecting, new Data(null, null)); - public void InitFSM() + When(State.Connecting, @event => { - StartWith(State.Connecting, new Data(null, null)); + if (@event.FsmEvent is IClientOp) + { + return Stay().Replying(new Status.Failure(new IllegalStateException("not connected yet"))); + } + var connected = @event.FsmEvent as Connected; + if (connected != null) + { + connected.Channel.WriteAndFlushAsync(new Hello(_name.Name, TestConductor.Get(Context.System).Address)); + return GoTo(State.AwaitDone).Using(new Data(connected.Channel, null)); + } + if (@event.FsmEvent is ConnectionFailure) + { + return GoTo(State.Failed); + } + if (@event.FsmEvent is StateTimeout) + { + _log.Error($"Failed to connect to test conductor within {_settings.ConnectTimeout.TotalMilliseconds} ms."); + return GoTo(State.Failed); + } + + return null; + }, _settings.ConnectTimeout); - When(State.Connecting, @event => + When(State.AwaitDone, @event => + { + switch (@event.FsmEvent) { - if (@event.FsmEvent is IClientOp) - { - return Stay().Replying(new Status.Failure(new IllegalStateException("not connected yet"))); - } - var connected = @event.FsmEvent as Connected; - if (connected != null) - { - connected.Channel.WriteAndFlushAsync(new Hello(_name.Name, TestConductor.Get(Context.System).Address)); - return GoTo(State.AwaitDone).Using(new Data(connected.Channel, null)); - } - if (@event.FsmEvent is ConnectionFailure) - { + case Done: + _log.Debug("received Done: starting test"); + return GoTo(State.Connected); + case INetworkOp: + _log.Error("Received {0} instead of Done", @event.FsmEvent); return GoTo(State.Failed); - } - if (@event.FsmEvent is StateTimeout) - { - _log.Error($"Failed to connect to test conductor within {_settings.ConnectTimeout.TotalMilliseconds} ms."); + case IServerOp: + return Stay().Replying(new Failure(new IllegalStateException("not connected yet"))); + case StateTimeout: + _log.Error("connect timeout to TestConductor"); return GoTo(State.Failed); - } - - return null; - }, _settings.ConnectTimeout); + default: + return null; + } + }, _settings.BarrierTimeout); - When(State.AwaitDone, @event => + When(State.Connected, @event => + { + if (@event.FsmEvent is Disconnected) { - switch (@event.FsmEvent) - { - case Done: - _log.Debug("received Done: starting test"); - return GoTo(State.Connected); - case INetworkOp: - _log.Error("Received {0} instead of Done", @event.FsmEvent); - return GoTo(State.Failed); - case IServerOp: - return Stay().Replying(new Failure(new IllegalStateException("not connected yet"))); - case StateTimeout: - _log.Error("connect timeout to TestConductor"); - return GoTo(State.Failed); - default: - return null; - } - }, _settings.BarrierTimeout); - - When(State.Connected, @event => + _log.Info("disconnected from TestConductor"); + throw new ConnectionFailure("disconnect"); + } + if(@event.FsmEvent is ToServer && @event.StateData.Channel != null) { - if (@event.FsmEvent is Disconnected) - { - _log.Info("disconnected from TestConductor"); - throw new ConnectionFailure("disconnect"); - } - if(@event.FsmEvent is ToServer && @event.StateData.Channel != null) + @event.StateData.Channel.WriteAndFlushAsync(Done.Instance); + return Stay(); + } + var toServer = @event.FsmEvent as IToServer; + if (toServer != null && @event.StateData.Channel != null && + @event.StateData.RunningOp == null) + { + @event.StateData.Channel.WriteAndFlushAsync(toServer.Msg); + string token = null; + var enterBarrier = @event.FsmEvent as ToServer; + if (enterBarrier != null) token = enterBarrier.Msg.Name; + else { - @event.StateData.Channel.WriteAndFlushAsync(Done.Instance); - return Stay(); + var getAddress = @event.FsmEvent as ToServer; + if (getAddress != null) token = getAddress.Msg.Node.Name; } - var toServer = @event.FsmEvent as IToServer; - if (toServer != null && @event.StateData.Channel != null && - @event.StateData.RunningOp == null) + return Stay().Using(@event.StateData.Copy(runningOp: (token, Sender))); + } + if (toServer != null && @event.StateData.Channel != null && + @event.StateData.RunningOp != null) + { + _log.Error("cannot write {0} while waiting for {1}", toServer.Msg, @event.StateData.RunningOp); + return Stay(); + } + if (@event.FsmEvent is IClientOp && @event.StateData.Channel != null) + { + var barrierResult = @event.FsmEvent as BarrierResult; + if (barrierResult != null) { - @event.StateData.Channel.WriteAndFlushAsync(toServer.Msg); - string token = null; - var enterBarrier = @event.FsmEvent as ToServer; - if (enterBarrier != null) token = enterBarrier.Msg.Name; - else + if (@event.StateData.RunningOp == null) { - var getAddress = @event.FsmEvent as ToServer; - if (getAddress != null) token = getAddress.Msg.Node.Name; + _log.Warning("did not expect {0}", @event.FsmEvent); } - return Stay().Using(@event.StateData.Copy(runningOp: (token, Sender))); - } - if (toServer != null && @event.StateData.Channel != null && - @event.StateData.RunningOp != null) - { - _log.Error("cannot write {0} while waiting for {1}", toServer.Msg, @event.StateData.RunningOp); - return Stay(); - } - if (@event.FsmEvent is IClientOp && @event.StateData.Channel != null) - { - var barrierResult = @event.FsmEvent as BarrierResult; - if (barrierResult != null) + else { - if (@event.StateData.RunningOp == null) + object response; + if (barrierResult.Name != @event.StateData.RunningOp.Value.Item1) { - _log.Warning("did not expect {0}", @event.FsmEvent); + response = + new Failure( + new Exception("wrong barrier " + barrierResult + " received while waiting for " + + @event.StateData.RunningOp.Value.Item1)); } - else - { - object response; - if (barrierResult.Name != @event.StateData.RunningOp.Value.Item1) - { - response = - new Failure( - new Exception("wrong barrier " + barrierResult + " received while waiting for " + - @event.StateData.RunningOp.Value.Item1)); - } - else if (!barrierResult.Success) - { - response = - new Failure( - new Exception("barrier failed:" + @event.StateData.RunningOp.Value.Item1)); - } - else - { - response = barrierResult.Name; - } - @event.StateData.RunningOp.Value.Item2.Tell(response); - } - return Stay().Using(@event.StateData.Copy(runningOp: null)); - } - var addressReply = @event.FsmEvent as AddressReply; - if (addressReply != null) - { - if (@event.StateData.RunningOp == null) + else if (!barrierResult.Success) { - _log.Warning("did not expect {0}", @event.FsmEvent); + response = + new Failure( + new Exception("barrier failed:" + @event.StateData.RunningOp.Value.Item1)); } else { - @event.StateData.RunningOp.Value.Item2.Tell(addressReply.Addr); + response = barrierResult.Name; } - return Stay().Using(@event.StateData.Copy(runningOp: null)); + @event.StateData.RunningOp.Value.Item2.Tell(response); } - var throttleMsg = @event.FsmEvent as ThrottleMsg; - if (@event.FsmEvent is ThrottleMsg) + return Stay().Using(@event.StateData.Copy(runningOp: null)); + } + var addressReply = @event.FsmEvent as AddressReply; + if (addressReply != null) + { + if (@event.StateData.RunningOp == null) { - ThrottleMode mode; - if (throttleMsg.RateMBit < 0.0f) mode = Unthrottled.Instance; - else if (throttleMsg.RateMBit == 0.0f) mode = Blackhole.Instance; - else mode = new Transport.TokenBucket(1000, throttleMsg.RateMBit*125000, 0, 0); - var cmdTask = - TestConductor.Get(Context.System) - .Transport.ManagementCommand(new SetThrottle(throttleMsg.Target, throttleMsg.Direction, - mode)); - - var self = Self; - cmdTask.ContinueWith(t => - { - if (t.IsFaulted) - throw new ConfigurationException("Throttle was requested from the TestConductor, but no transport " + - "adapters available that support throttling. Specify 'testTransport(on=true)' in your MultiNodeConfig"); - self.Tell(new ToServer(Done.Instance)); - }); - return Stay(); + _log.Warning("did not expect {0}", @event.FsmEvent); } - if (@event.FsmEvent is DisconnectMsg) - return Stay(); //FIXME is this the right EC for the future below? - var terminateMsg = @event.FsmEvent as TerminateMsg; - if (terminateMsg != null) + else { - _log.Info("Received TerminateMsg - shutting down..."); - if (terminateMsg.ShutdownOrExit.IsLeft && terminateMsg.ShutdownOrExit.ToLeft().Value == false) - { - Context.System.Terminate(); - return Stay(); - } - if (terminateMsg.ShutdownOrExit.IsLeft && terminateMsg.ShutdownOrExit.ToLeft().Value == true) - { - Context.System.AsInstanceOf().Abort(); - return Stay(); - } - if (terminateMsg.ShutdownOrExit.IsRight) - { - Environment.Exit(terminateMsg.ShutdownOrExit.ToRight().Value); - return Stay(); - } + @event.StateData.RunningOp.Value.Item2.Tell(addressReply.Addr); } - if (@event.FsmEvent is Done) return Stay(); //FIXME what should happen? + return Stay().Using(@event.StateData.Copy(runningOp: null)); } - return null; - }); - - When(State.Failed, @event => - { - if (@event.FsmEvent is IClientOp) + var throttleMsg = @event.FsmEvent as ThrottleMsg; + if (@event.FsmEvent is ThrottleMsg) { - return Stay().Replying(new Status.Failure(new Exception("cannot do " + @event.FsmEvent + " while failed"))); + ThrottleMode mode; + if (throttleMsg.RateMBit < 0.0f) mode = Unthrottled.Instance; + else if (throttleMsg.RateMBit == 0.0f) mode = Blackhole.Instance; + else mode = new Transport.TokenBucket(1000, throttleMsg.RateMBit*125000, 0, 0); + var cmdTask = + TestConductor.Get(Context.System) + .Transport.ManagementCommand(new SetThrottle(throttleMsg.Target, throttleMsg.Direction, + mode)); + + var self = Self; + cmdTask.ContinueWith(t => + { + if (t.IsFaulted) + throw new ConfigurationException("Throttle was requested from the TestConductor, but no transport " + + "adapters available that support throttling. Specify 'testTransport(on=true)' in your MultiNodeConfig"); + self.Tell(new ToServer(Done.Instance)); + }); + return Stay(); } - if (@event.FsmEvent is INetworkOp) + if (@event.FsmEvent is DisconnectMsg) + return Stay(); //FIXME is this the right EC for the future below? + var terminateMsg = @event.FsmEvent as TerminateMsg; + if (terminateMsg != null) { - _log.Warning("ignoring network message {0} while Failed", @event.FsmEvent); - return Stay(); + _log.Info("Received TerminateMsg - shutting down..."); + if (terminateMsg.ShutdownOrExit.IsLeft && terminateMsg.ShutdownOrExit.ToLeft().Value == false) + { + Context.System.Terminate(); + return Stay(); + } + if (terminateMsg.ShutdownOrExit.IsLeft && terminateMsg.ShutdownOrExit.ToLeft().Value == true) + { + Context.System.AsInstanceOf().Abort(); + return Stay(); + } + if (terminateMsg.ShutdownOrExit.IsRight) + { + Environment.Exit(terminateMsg.ShutdownOrExit.ToRight().Value); + return Stay(); + } } - return null; - }); + if (@event.FsmEvent is Done) return Stay(); //FIXME what should happen? + } + return null; + }); - OnTermination(e => + When(State.Failed, @event => + { + if (@event.FsmEvent is IClientOp) { - _log.Info("Terminating connection to multi-node test controller due to [{0}]", e.Reason); - if (e.StateData.Channel != null) + return Stay().Replying(new Status.Failure(new Exception("cannot do " + @event.FsmEvent + " while failed"))); + } + if (@event.FsmEvent is INetworkOp) + { + _log.Warning("ignoring network message {0} while Failed", @event.FsmEvent); + return Stay(); + } + return null; + }); + + OnTermination(e => + { + _log.Info("Terminating connection to multi-node test controller due to [{0}]", e.Reason); + if (e.StateData.Channel != null) + { + var disconnectTimeout = TimeSpan.FromSeconds(2); //todo: make into setting loaded from HOCON + if (!e.StateData.Channel.CloseAsync().Wait(disconnectTimeout)) { - var disconnectTimeout = TimeSpan.FromSeconds(2); //todo: make into setting loaded from HOCON - if (!e.StateData.Channel.CloseAsync().Wait(disconnectTimeout)) - { - _log.Warning("Failed to disconnect from conductor within {0}", disconnectTimeout); - } + _log.Warning("Failed to disconnect from conductor within {0}", disconnectTimeout); } - }); + } + }); - Initialize(); - } + Initialize(); } +} + +/// +/// This handler only forwards messages received from the conductor to the +/// +/// INTERNAL API. +/// +internal class PlayerHandler : ChannelHandlerAdapter +{ + private readonly IPEndPoint _server; + private int _reconnects; + private readonly TimeSpan _backoff; + private readonly int _poolSize; + private readonly IActorRef _fsm; + private readonly ILoggingAdapter _log; + private readonly IScheduler _scheduler; + private bool _loggedDisconnect = false; + + private Deadline _nextAttempt; /// - /// This handler only forwards messages received from the conductor to the - /// - /// INTERNAL API. + /// Shareable, since the handler may be added multiple times during reconnect /// - internal class PlayerHandler : ChannelHandlerAdapter - { - private readonly IPEndPoint _server; - private int _reconnects; - private readonly TimeSpan _backoff; - private readonly int _poolSize; - private readonly IActorRef _fsm; - private readonly ILoggingAdapter _log; - private readonly IScheduler _scheduler; - private bool _loggedDisconnect = false; + public override bool IsSharable => true; - private Deadline _nextAttempt; - - /// - /// Shareable, since the handler may be added multiple times during reconnect - /// - public override bool IsSharable => true; + public PlayerHandler(IPEndPoint server, int reconnects, TimeSpan backoff, int poolSize, IActorRef fsm, + ILoggingAdapter log, IScheduler scheduler) + { + _server = server; + _reconnects = reconnects; + _backoff = backoff; + _poolSize = poolSize; + _fsm = fsm; + _log = log; + _scheduler = scheduler; + + Reconnect(); + } - public PlayerHandler(IPEndPoint server, int reconnects, TimeSpan backoff, int poolSize, IActorRef fsm, - ILoggingAdapter log, IScheduler scheduler) - { - _server = server; - _reconnects = reconnects; - _backoff = backoff; - _poolSize = poolSize; - _fsm = fsm; - _log = log; - _scheduler = scheduler; - - Reconnect(); - } + private static string FormatConnectionFailure(IChannelHandlerContext context, Exception exception) + { + var sb = new StringBuilder(); + sb.AppendLine($"Connection between [Local: {context.Channel.LocalAddress}] and [Remote: {context.Channel.RemoteAddress}] has failed."); + sb.AppendLine($"Cause: {exception}"); + sb.AppendLine($"Trace: {exception.StackTrace}"); + return sb.ToString(); + } - private static string FormatConnectionFailure(IChannelHandlerContext context, Exception exception) + public override void ExceptionCaught(IChannelHandlerContext context, Exception exception) + { + _log.Debug("channel {0} exception {1}", context.Channel, exception); + if (exception is ConnectException && _reconnects > 0) { - var sb = new StringBuilder(); - sb.AppendLine($"Connection between [Local: {context.Channel.LocalAddress}] and [Remote: {context.Channel.RemoteAddress}] has failed."); - sb.AppendLine($"Cause: {exception}"); - sb.AppendLine($"Trace: {exception.StackTrace}"); - return sb.ToString(); + _reconnects -= 1; + if (_nextAttempt.IsOverdue) + { + Reconnect(); + } + else + { + _scheduler.Advanced.ScheduleOnce(_nextAttempt.TimeLeft, Reconnect); + } + return; } + _fsm.Tell(new ClientFSM.ConnectionFailure(FormatConnectionFailure(context, exception))); + } - public override void ExceptionCaught(IChannelHandlerContext context, Exception exception) + private void Reconnect() + { + _log.Debug("Connecting..."); + _nextAttempt = Deadline.Now + _backoff; + RemoteConnection.CreateConnection(Role.Client, _server, _poolSize, this).ContinueWith(_ => { - _log.Debug("channel {0} exception {1}", context.Channel, exception); - if (exception is ConnectException && _reconnects > 0) + _log.Debug("Failed to connect.... Retrying again in {0}s. {1} attempts left.", _nextAttempt.TimeLeft,_reconnects); + if (_reconnects > 0) { _reconnects -= 1; if (_nextAttempt.IsOverdue) @@ -598,79 +672,54 @@ public override void ExceptionCaught(IChannelHandlerContext context, Exception e { _scheduler.Advanced.ScheduleOnce(_nextAttempt.TimeLeft, Reconnect); } - return; } - _fsm.Tell(new ClientFSM.ConnectionFailure(FormatConnectionFailure(context, exception))); - } + }, TaskContinuationOptions.NotOnRanToCompletion); + } - private void Reconnect() - { - _log.Debug("Connecting..."); - _nextAttempt = Deadline.Now + _backoff; - RemoteConnection.CreateConnection(Role.Client, _server, _poolSize, this).ContinueWith(_ => - { - _log.Debug("Failed to connect.... Retrying again in {0}s. {1} attempts left.", _nextAttempt.TimeLeft,_reconnects); - if (_reconnects > 0) - { - _reconnects -= 1; - if (_nextAttempt.IsOverdue) - { - Reconnect(); - } - else - { - _scheduler.Advanced.ScheduleOnce(_nextAttempt.TimeLeft, Reconnect); - } - } - }, TaskContinuationOptions.NotOnRanToCompletion); - } + public override void ChannelActive(IChannelHandlerContext context) + { + _log.Debug("connected to {0}", context.Channel.RemoteAddress); + _fsm.Tell(new ClientFSM.Connected(context.Channel)); + context.FireChannelActive(); + } - public override void ChannelActive(IChannelHandlerContext context) + public override void ChannelInactive(IChannelHandlerContext context) + { + if (!_loggedDisconnect) //added this to help mute log messages { - _log.Debug("connected to {0}", context.Channel.RemoteAddress); - _fsm.Tell(new ClientFSM.Connected(context.Channel)); - context.FireChannelActive(); + _loggedDisconnect = true; + _log.Debug("disconnected from {0}", context.Channel.RemoteAddress); + } + _fsm.Tell(PoisonPill.Instance); - public override void ChannelInactive(IChannelHandlerContext context) + // run outside of the Helios / DotNetty threadpool + Task.Factory.StartNew(() => { - if (!_loggedDisconnect) //added this to help mute log messages - { - _loggedDisconnect = true; - _log.Debug("disconnected from {0}", context.Channel.RemoteAddress); - - } - _fsm.Tell(PoisonPill.Instance); - - // run outside of the Helios / DotNetty threadpool - Task.Factory.StartNew(() => - { - RemoteConnection.Shutdown(context.Channel); + RemoteConnection.Shutdown(context.Channel); #pragma warning disable CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed - RemoteConnection.ReleaseAll(); // yep, let it run asynchronously. + RemoteConnection.ReleaseAll(); // yep, let it run asynchronously. #pragma warning restore CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed - }, CancellationToken.None, TaskCreationOptions.None, TaskScheduler.Default); - context.FireChannelInactive(); - } - - public override void ChannelRead(IChannelHandlerContext context, object message) - { - var channel = context.Channel; - _log.Debug("message from {0}, {1}", channel.RemoteAddress, message); - if (message is INetworkOp) - { - _fsm.Tell(message); - return; - } - _log.Info("server {0} sent garbage '{1}', disconnecting", channel.RemoteAddress, message); - channel.CloseAsync(); - } + }, CancellationToken.None, TaskCreationOptions.None, TaskScheduler.Default); + context.FireChannelInactive(); + } - public override Task CloseAsync(IChannelHandlerContext context) + public override void ChannelRead(IChannelHandlerContext context, object message) + { + var channel = context.Channel; + _log.Debug("message from {0}, {1}", channel.RemoteAddress, message); + if (message is INetworkOp) { - _log.Info("Client: disconnecting {0} from {1}", context.Channel.LocalAddress, context.Channel.RemoteAddress); - return base.CloseAsync(context); + _fsm.Tell(message); + return; } + _log.Info("server {0} sent garbage '{1}', disconnecting", channel.RemoteAddress, message); + channel.CloseAsync(); } -} + public override Task CloseAsync(IChannelHandlerContext context) + { + _log.Info("Client: disconnecting {0} from {1}", context.Channel.LocalAddress, context.Channel.RemoteAddress); + return base.CloseAsync(context); + } +} \ No newline at end of file diff --git a/src/core/Akka.Remote.Tests.MultiNode/RemoteNodeDeathWatchSpec.cs b/src/core/Akka.Remote.Tests.MultiNode/RemoteNodeDeathWatchSpec.cs index a41fda8820a..5e97d3a73b6 100644 --- a/src/core/Akka.Remote.Tests.MultiNode/RemoteNodeDeathWatchSpec.cs +++ b/src/core/Akka.Remote.Tests.MultiNode/RemoteNodeDeathWatchSpec.cs @@ -8,6 +8,7 @@ using System; using System.Linq; using System.Threading; +using System.Threading.Tasks; using Akka.Actor; using Akka.Configuration; using Akka.Event; @@ -17,547 +18,545 @@ using Akka.TestKit; using static Akka.Remote.Tests.MultiNode.RemoteNodeDeathWatchMultiNetSpec; -namespace Akka.Remote.Tests.MultiNode +namespace Akka.Remote.Tests.MultiNode; + +public class RemoteNodeDeathWatchMultiNetSpec : MultiNodeConfig { - public class RemoteNodeDeathWatchMultiNetSpec : MultiNodeConfig + public RemoteNodeDeathWatchMultiNetSpec() { - public RemoteNodeDeathWatchMultiNetSpec() - { - First = Role("first"); - Second = Role("second"); - Third = Role("third"); + First = Role("first"); + Second = Role("second"); + Third = Role("third"); - CommonConfig = DebugConfig(false).WithFallback(ConfigurationFactory.ParseString(@" + CommonConfig = DebugConfig(false).WithFallback(ConfigurationFactory.ParseString(@" akka.loglevel = INFO akka.remote.log-remote-lifecycle-events = off ## Use a tighter setting than the default, otherwise it takes 20s for DeathWatch to trigger akka.remote.watch-failure-detector.acceptable-heartbeat-pause = 3 s ")); - TestTransport = true; - } + TestTransport = true; + } - public RoleName First { get; } - public RoleName Second { get; } - public RoleName Third { get; } + public RoleName First { get; } + public RoleName Second { get; } + public RoleName Third { get; } - public sealed class WatchIt + public sealed class WatchIt + { + public WatchIt(IActorRef watchee) { - public WatchIt(IActorRef watchee) - { - Watchee = watchee; - } - - public IActorRef Watchee { get; } + Watchee = watchee; } - public sealed class UnwatchIt - { - public UnwatchIt(IActorRef watchee) - { - Watchee = watchee; - } + public IActorRef Watchee { get; } + } - public IActorRef Watchee { get; } + public sealed class UnwatchIt + { + public UnwatchIt(IActorRef watchee) + { + Watchee = watchee; } - public sealed class Ack - { - public static Ack Instance { get; } = new(); + public IActorRef Watchee { get; } + } - private Ack() - { - } - } + public sealed class Ack + { + public static Ack Instance { get; } = new(); - /// - /// Forwarding to non-watching testActor is not possible, - /// and therefore the message is wrapped. - /// - public sealed class WrappedTerminated + private Ack() { - public WrappedTerminated(Terminated t) - { - T = t; - } + } + } - public Terminated T { get; } + /// + /// Forwarding to non-watching testActor is not possible, + /// and therefore the message is wrapped. + /// + public sealed class WrappedTerminated + { + public WrappedTerminated(Terminated t) + { + T = t; } - public class ProbeActor : ReceiveActor + public Terminated T { get; } + } + + public class ProbeActor : ReceiveActor + { + private readonly IActorRef _testActor; + + public ProbeActor(IActorRef testActor) { - private readonly IActorRef _testActor; + _testActor = testActor; - public ProbeActor(IActorRef testActor) + Receive(w => { - _testActor = testActor; - - Receive(w => - { - Context.Watch(w.Watchee); - Sender.Tell(Ack.Instance); - }); - Receive(w => - { - Context.Unwatch(w.Watchee); - Sender.Tell(Ack.Instance); - }); - Receive(t => _testActor.Forward(new WrappedTerminated(t))); - ReceiveAny(msg => _testActor.Forward(msg)); - } + Context.Watch(w.Watchee); + Sender.Tell(Ack.Instance); + }); + Receive(w => + { + Context.Unwatch(w.Watchee); + Sender.Tell(Ack.Instance); + }); + Receive(t => _testActor.Forward(new WrappedTerminated(t))); + ReceiveAny(msg => _testActor.Forward(msg)); } } +} + +public abstract class RemoteNodeDeathWatchSpec : MultiNodeSpec +{ + private readonly RemoteNodeDeathWatchMultiNetSpec _config; + private readonly Lazy _remoteWatcher; + private readonly Func _identify; - public abstract class RemoteNodeDeathWatchSpec : MultiNodeSpec + protected RemoteNodeDeathWatchSpec(Type type) : this(new RemoteNodeDeathWatchMultiNetSpec(), type) { - private readonly RemoteNodeDeathWatchMultiNetSpec _config; - private readonly Lazy _remoteWatcher; - private readonly Func _identify; + } - protected RemoteNodeDeathWatchSpec(Type type) : this(new RemoteNodeDeathWatchMultiNetSpec(), type) - { - } + protected RemoteNodeDeathWatchSpec(RemoteNodeDeathWatchMultiNetSpec config, Type type) : base(config, type) + { + _config = config; - protected RemoteNodeDeathWatchSpec(RemoteNodeDeathWatchMultiNetSpec config, Type type) : base(config, type) + _remoteWatcher = new Lazy(() => { - _config = config; + Sys.ActorSelection("/system/remote-watcher").Tell(new Identify(null)); + return ExpectMsg(TimeSpan.FromSeconds(10)).Subject; + }); - _remoteWatcher = new Lazy(() => - { - Sys.ActorSelection("/system/remote-watcher").Tell(new Identify(null)); - return ExpectMsg(TimeSpan.FromSeconds(10)).Subject; - }); + _identify = (role, actorName) => + { + Sys.ActorSelection(Node(role) / "user" / actorName).Tell(new Identify(actorName)); + return ExpectMsg(TimeSpan.FromSeconds(10)).Subject; + }; - _identify = (role, actorName) => - { - Sys.ActorSelection(Node(role) / "user" / actorName).Tell(new Identify(actorName)); - return ExpectMsg(TimeSpan.FromSeconds(10)).Subject; - }; + MuteDeadLetters(null, typeof(Heartbeat)); + } - MuteDeadLetters(null, typeof(Heartbeat)); - } + protected override int InitialParticipantsValueFactory => Roles.Count; - protected override int InitialParticipantsValueFactory => Roles.Count; + protected abstract string Scenario { get; } - protected abstract string Scenario { get; } + protected abstract Func SleepAsync { get; } - protected abstract Action Sleep { get; } + private async Task AssertCleanup(TimeSpan? timeout = null) + { + timeout ??= TimeSpan.FromSeconds(5); - private void AssertCleanup(TimeSpan? timeout = null) + await WithinAsync(timeout.Value, async () => { - timeout = timeout ?? TimeSpan.FromSeconds(5); - - Within(timeout.Value, () => + await AwaitAssertAsync(async () => { - AwaitAssert(() => - { - _remoteWatcher.Value.Tell(RemoteWatcher.Stats.Empty); - ExpectMsg(s => Equals(s, RemoteWatcher.Stats.Empty)); - }); + _remoteWatcher.Value.Tell(RemoteWatcher.Stats.Empty); + await ExpectMsgAsync(s => Equals(s, RemoteWatcher.Stats.Empty)); }); - } + }); + } - [MultiNodeFact] - public void RemoteNodeDeathWatchSpecs() - { - Console.WriteLine($"Executing with {Scenario} scenario"); - - RemoteNodeDeathWatch_must_receive_Terminated_when_remote_actor_is_stopped(); - RemoteNodeDeathWatch_must_cleanup_after_watch_unwatch(); - RemoteNodeDeathWatch_must_cleanup_after_bi_directional_watch_unwatch(); - RemoteNodeDeathWatch_must_cleanup_after_bi_directional_watch_stop_unwatch(); - RemoteNodeDeathWatch_must_cleanup_after_stop(); - RemoteNodeDeathWatch_must_receive_Terminated_when_watched_node_crash(); - RemoteNodeDeathWatch_must_cleanup_when_watching_node_crash(); - } + [MultiNodeFact] + public async Task RemoteNodeDeathWatchSpecs() + { + Console.WriteLine($"Executing with {Scenario} scenario"); + + await RemoteNodeDeathWatch_must_receive_Terminated_when_remote_actor_is_stoppedAsync(); + await RemoteNodeDeathWatch_must_cleanup_after_watch_unwatchAsync(); + await RemoteNodeDeathWatch_must_cleanup_after_bi_directional_watch_unwatchAsync(); + await RemoteNodeDeathWatch_must_cleanup_after_bi_directional_watch_stop_unwatchAsync(); + await RemoteNodeDeathWatch_must_cleanup_after_stopAsync(); + await RemoteNodeDeathWatch_must_receive_Terminated_when_watched_node_crashAsync(); + await RemoteNodeDeathWatch_must_cleanup_when_watching_node_crashAsync(); + } - private void RemoteNodeDeathWatch_must_receive_Terminated_when_remote_actor_is_stopped() + private async Task RemoteNodeDeathWatch_must_receive_Terminated_when_remote_actor_is_stoppedAsync() + { + await RunOnAsync(async () => { - RunOn(() => - { - var watcher = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "watcher1"); - EnterBarrier("actors-started-1"); + var watcher = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "watcher1"); + await EnterBarrierAsync("actors-started-1"); - var subject = _identify(_config.Second, "subject1"); - watcher.Tell(new WatchIt(subject)); - ExpectMsg(TimeSpan.FromSeconds(1)); - subject.Tell("hello1"); - EnterBarrier("hello1-message-sent"); - EnterBarrier("watch-established-1"); + var subject = _identify(_config.Second, "subject1"); + watcher.Tell(new WatchIt(subject)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + subject.Tell("hello1"); + await EnterBarrierAsync("hello1-message-sent"); + await EnterBarrierAsync("watch-established-1"); - Sleep(); - ExpectMsg().T.ActorRef.ShouldBe(subject); - }, _config.First); + await SleepAsync(); + (await ExpectMsgAsync()).T.ActorRef.ShouldBe(subject); + }, _config.First); - RunOn(() => - { - var subject = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "subject1"); - EnterBarrier("actors-started-1"); + await RunOnAsync(async () => + { + var subject = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "subject1"); + await EnterBarrierAsync("actors-started-1"); - EnterBarrier("hello1-message-sent"); - ExpectMsg("hello1", TimeSpan.FromSeconds(3)); - EnterBarrier("watch-established-1"); + await EnterBarrierAsync("hello1-message-sent"); + await ExpectMsgAsync("hello1", TimeSpan.FromSeconds(3)); + await EnterBarrierAsync("watch-established-1"); - Sleep(); - Sys.Stop(subject); - }, _config.Second); + await SleepAsync(); + Sys.Stop(subject); + }, _config.Second); - RunOn(() => - { - EnterBarrier("actors-started-1"); - EnterBarrier("hello1-message-sent"); - EnterBarrier("watch-established-1"); - }, _config.Third); + await RunOnAsync(async () => + { + await EnterBarrierAsync("actors-started-1"); + await EnterBarrierAsync("hello1-message-sent"); + await EnterBarrierAsync("watch-established-1"); + }, _config.Third); - EnterBarrier("terminated-verified-1"); + await EnterBarrierAsync("terminated-verified-1"); - // verify that things are cleaned up, and heartbeating is stopped - AssertCleanup(); - ExpectNoMsg(TimeSpan.FromSeconds(2)); - AssertCleanup(); + // verify that things are cleaned up, and heartbeating is stopped + await AssertCleanup(); + await ExpectNoMsgAsync(TimeSpan.FromSeconds(2)); + await AssertCleanup(); - EnterBarrier("after-1"); - } + await EnterBarrierAsync("after-1"); + } - private void RemoteNodeDeathWatch_must_cleanup_after_watch_unwatch() + private async Task RemoteNodeDeathWatch_must_cleanup_after_watch_unwatchAsync() + { + await RunOnAsync(async () => { - RunOn(() => - { - var watcher = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "watcher2"); - EnterBarrier("actors-started-2"); + var watcher = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "watcher2"); + await EnterBarrierAsync("actors-started-2"); - var subject = _identify(_config.Second, "subject2"); - watcher.Tell(new WatchIt(subject)); - ExpectMsg(TimeSpan.FromSeconds(1)); - EnterBarrier("watch-2"); + var subject = _identify(_config.Second, "subject2"); + watcher.Tell(new WatchIt(subject)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + await EnterBarrierAsync("watch-2"); - Sleep(); + await SleepAsync(); - watcher.Tell(new UnwatchIt(subject)); - ExpectMsg(TimeSpan.FromSeconds(1)); - EnterBarrier("unwatch-2"); - }, _config.First); + watcher.Tell(new UnwatchIt(subject)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + await EnterBarrierAsync("unwatch-2"); + }, _config.First); - RunOn(() => Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "subject2"), _config.Second); + RunOn(() => Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "subject2"), _config.Second); - RunOn(() => - { - EnterBarrier("actors-started-2"); - EnterBarrier("watch-2"); - EnterBarrier("unwatch-2"); - }, _config.Second, _config.Third); + await RunOnAsync(async () => + { + await EnterBarrierAsync("actors-started-2"); + await EnterBarrierAsync("watch-2"); + await EnterBarrierAsync("unwatch-2"); + }, _config.Second, _config.Third); - // verify that things are cleaned up, and heartbeating is stopped - AssertCleanup(); - ExpectNoMsg(TimeSpan.FromSeconds(2)); - AssertCleanup(); + // verify that things are cleaned up, and heartbeating is stopped + await AssertCleanup(); + await ExpectNoMsgAsync(TimeSpan.FromSeconds(2)); + await AssertCleanup(); - EnterBarrier("after-2"); - } + await EnterBarrierAsync("after-2"); + } - private void RemoteNodeDeathWatch_must_cleanup_after_bi_directional_watch_unwatch() + private async Task RemoteNodeDeathWatch_must_cleanup_after_bi_directional_watch_unwatchAsync() + { + await RunOnAsync(async () => { - RunOn(() => - { - var watcher = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "watcher3"); - Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "subject3"); - EnterBarrier("actors-started-3"); + var watcher = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "watcher3"); + Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "subject3"); + await EnterBarrierAsync("actors-started-3"); - var other = Myself == _config.First ? _config.Second : _config.First; - var subject = _identify(other, "subject3"); - watcher.Tell(new WatchIt(subject)); - ExpectMsg(TimeSpan.FromSeconds(1)); - EnterBarrier("watch-3"); + var other = Myself == _config.First ? _config.Second : _config.First; + var subject = _identify(other, "subject3"); + watcher.Tell(new WatchIt(subject)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + await EnterBarrierAsync("watch-3"); - Sleep(); + await SleepAsync(); - watcher.Tell(new UnwatchIt(subject)); - ExpectMsg(TimeSpan.FromSeconds(1)); - EnterBarrier("unwatch-3"); - }, _config.First, _config.Second); + watcher.Tell(new UnwatchIt(subject)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + await EnterBarrierAsync("unwatch-3"); + }, _config.First, _config.Second); - RunOn(() => - { - EnterBarrier("actors-started-3"); - EnterBarrier("watch-3"); - EnterBarrier("unwatch-3"); - }, _config.Third); + await RunOnAsync(async () => + { + await EnterBarrierAsync("actors-started-3"); + await EnterBarrierAsync("watch-3"); + await EnterBarrierAsync("unwatch-3"); + }, _config.Third); - // verify that things are cleaned up, and heartbeating is stopped - AssertCleanup(); - ExpectNoMsg(TimeSpan.FromSeconds(2)); - AssertCleanup(); + // verify that things are cleaned up, and heartbeating is stopped + await AssertCleanup(); + await ExpectNoMsgAsync(TimeSpan.FromSeconds(2)); + await AssertCleanup(); - EnterBarrier("after-3"); - } + await EnterBarrierAsync("after-3"); + } - private void RemoteNodeDeathWatch_must_cleanup_after_bi_directional_watch_stop_unwatch() + private async Task RemoteNodeDeathWatch_must_cleanup_after_bi_directional_watch_stop_unwatchAsync() + { + await RunOnAsync(async () => { - RunOn(() => - { - var watcher1 = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "w1"); - var watcher2 = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "w2"); - var s1 = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "s1"); - var s2 = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "s2"); - EnterBarrier("actors-started-4"); - - var other = Myself == _config.First ? _config.Second : _config.First; - var subject1 = _identify(other, "s1"); - var subject2 = _identify(other, "s2"); - watcher1.Tell(new WatchIt(subject1)); - ExpectMsg(TimeSpan.FromSeconds(1)); - watcher2.Tell(new WatchIt(subject2)); - ExpectMsg(TimeSpan.FromSeconds(1)); - EnterBarrier("watch-4"); - - Sleep(); - - watcher1.Tell(new UnwatchIt(subject1)); - ExpectMsg(TimeSpan.FromSeconds(1)); - EnterBarrier("unwatch-s1-4"); - Sys.Stop(s1); - ExpectNoMsg(TimeSpan.FromSeconds(2)); - EnterBarrier("stop-s1-4"); - - Sys.Stop(s2); - EnterBarrier("stop-s2-4"); - ExpectMsg().T.ActorRef.ShouldBe(subject2); - }, _config.First, _config.Second); - - RunOn(() => - { - EnterBarrier("actors-started-4"); - EnterBarrier("watch-4"); - EnterBarrier("unwatch-s1-4"); - EnterBarrier("stop-s1-4"); - EnterBarrier("stop-s2-4"); - }, _config.Third); + var watcher1 = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "w1"); + var watcher2 = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "w2"); + var s1 = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "s1"); + var s2 = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "s2"); + await EnterBarrierAsync("actors-started-4"); + + var other = Myself == _config.First ? _config.Second : _config.First; + var subject1 = _identify(other, "s1"); + var subject2 = _identify(other, "s2"); + watcher1.Tell(new WatchIt(subject1)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + watcher2.Tell(new WatchIt(subject2)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + await EnterBarrierAsync("watch-4"); + + await SleepAsync(); + + watcher1.Tell(new UnwatchIt(subject1)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + await EnterBarrierAsync("unwatch-s1-4"); + Sys.Stop(s1); + await ExpectNoMsgAsync(TimeSpan.FromSeconds(2)); + await EnterBarrierAsync("stop-s1-4"); + + Sys.Stop(s2); + await EnterBarrierAsync("stop-s2-4"); + (await ExpectMsgAsync()).T.ActorRef.ShouldBe(subject2); + }, _config.First, _config.Second); + + await RunOnAsync(async () => + { + await EnterBarrierAsync("actors-started-4"); + await EnterBarrierAsync("watch-4"); + await EnterBarrierAsync("unwatch-s1-4"); + await EnterBarrierAsync("stop-s1-4"); + await EnterBarrierAsync("stop-s2-4"); + }, _config.Third); + + // verify that things are cleaned up, and heartbeating is stopped + await AssertCleanup(); + await ExpectNoMsgAsync(TimeSpan.FromSeconds(2)); + await AssertCleanup(); + + await EnterBarrierAsync("after-4"); + } - // verify that things are cleaned up, and heartbeating is stopped - AssertCleanup(); - ExpectNoMsg(TimeSpan.FromSeconds(2)); - AssertCleanup(); + private async Task RemoteNodeDeathWatch_must_cleanup_after_stopAsync() + { + await RunOnAsync(async () => + { + var p1 = CreateTestProbe(); + var p2 = CreateTestProbe(); + var p3 = CreateTestProbe(); + var a1 = Sys.ActorOf(Props.Create(() => new ProbeActor(p1.Ref)), "a1"); + var a2 = Sys.ActorOf(Props.Create(() => new ProbeActor(p2.Ref)), "a2"); + var a3 = Sys.ActorOf(Props.Create(() => new ProbeActor(p3.Ref)), "a3"); + + await EnterBarrierAsync("actors-started-5"); + + var b1 = _identify(_config.Second, "b1"); + var b2 = _identify(_config.Second, "b2"); + var b3 = _identify(_config.Second, "b3"); + + a1.Tell(new WatchIt(b1)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + a1.Tell(new WatchIt(b2)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + a2.Tell(new WatchIt(b2)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + a3.Tell(new WatchIt(b3)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + await SleepAsync(); + a2.Tell(new UnwatchIt(b2)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + + await EnterBarrierAsync("watch-established-5"); + + await SleepAsync(); + + a1.Tell(PoisonPill.Instance); + a2.Tell(PoisonPill.Instance); + a3.Tell(PoisonPill.Instance); + + await EnterBarrierAsync("stopped-5"); + await EnterBarrierAsync("terminated-verified-5"); - EnterBarrier("after-4"); - } + // verify that things are cleaned up, and heartbeating is stopped + await AssertCleanup(); + await ExpectNoMsgAsync(TimeSpan.FromSeconds(2)); + await AssertCleanup(); + }, _config.First); - private void RemoteNodeDeathWatch_must_cleanup_after_stop() + await RunOnAsync(async () => { - RunOn(() => - { - var p1 = CreateTestProbe(); - var p2 = CreateTestProbe(); - var p3 = CreateTestProbe(); - var a1 = Sys.ActorOf(Props.Create(() => new ProbeActor(p1.Ref)), "a1"); - var a2 = Sys.ActorOf(Props.Create(() => new ProbeActor(p2.Ref)), "a2"); - var a3 = Sys.ActorOf(Props.Create(() => new ProbeActor(p3.Ref)), "a3"); - - EnterBarrier("actors-started-5"); - - var b1 = _identify(_config.Second, "b1"); - var b2 = _identify(_config.Second, "b2"); - var b3 = _identify(_config.Second, "b3"); - - a1.Tell(new WatchIt(b1)); - ExpectMsg(TimeSpan.FromSeconds(1)); - a1.Tell(new WatchIt(b2)); - ExpectMsg(TimeSpan.FromSeconds(1)); - a2.Tell(new WatchIt(b2)); - ExpectMsg(TimeSpan.FromSeconds(1)); - a3.Tell(new WatchIt(b3)); - ExpectMsg(TimeSpan.FromSeconds(1)); - Sleep(); - a2.Tell(new UnwatchIt(b2)); - ExpectMsg(TimeSpan.FromSeconds(1)); - - EnterBarrier("watch-established-5"); - - Sleep(); - - a1.Tell(PoisonPill.Instance); - a2.Tell(PoisonPill.Instance); - a3.Tell(PoisonPill.Instance); - - EnterBarrier("stopped-5"); - EnterBarrier("terminated-verified-5"); - - // verify that things are cleaned up, and heartbeating is stopped - AssertCleanup(); - ExpectNoMsg(TimeSpan.FromSeconds(2)); - AssertCleanup(); - }, _config.First); - - RunOn(() => - { - var p1 = CreateTestProbe(); - var p2 = CreateTestProbe(); - var p3 = CreateTestProbe(); - var b1 = Sys.ActorOf(Props.Create(() => new ProbeActor(p1.Ref)), "b1"); - var b2 = Sys.ActorOf(Props.Create(() => new ProbeActor(p2.Ref)), "b2"); - var b3 = Sys.ActorOf(Props.Create(() => new ProbeActor(p3.Ref)), "b3"); - - EnterBarrier("actors-started-5"); - - var a1 = _identify(_config.First, "a1"); - var a2 = _identify(_config.First, "a2"); - var a3 = _identify(_config.First, "a3"); - - b1.Tell(new WatchIt(a1)); - ExpectMsg(TimeSpan.FromSeconds(1)); - b1.Tell(new WatchIt(a2)); - ExpectMsg(TimeSpan.FromSeconds(1)); - b2.Tell(new WatchIt(a2)); - ExpectMsg(TimeSpan.FromSeconds(1)); - b3.Tell(new WatchIt(a3)); - ExpectMsg(TimeSpan.FromSeconds(1)); - Sleep(); - b2.Tell(new UnwatchIt(a2)); - ExpectMsg(TimeSpan.FromSeconds(1)); - - EnterBarrier("watch-established-5"); - EnterBarrier("stopped-5"); - - p1.ReceiveN(2, TimeSpan.FromSeconds(20)) - .Cast() - .Select(w => w.T.ActorRef) - .OrderBy(r => r.Path.Name) - .ShouldBe(new[] {a1, a2}); - p3.ExpectMsg(TimeSpan.FromSeconds(5)).T.ActorRef.ShouldBe(a3); - p2.ExpectNoMsg(TimeSpan.FromSeconds(2)); - EnterBarrier("terminated-verified-5"); + var p1 = CreateTestProbe(); + var p2 = CreateTestProbe(); + var p3 = CreateTestProbe(); + var b1 = Sys.ActorOf(Props.Create(() => new ProbeActor(p1.Ref)), "b1"); + var b2 = Sys.ActorOf(Props.Create(() => new ProbeActor(p2.Ref)), "b2"); + var b3 = Sys.ActorOf(Props.Create(() => new ProbeActor(p3.Ref)), "b3"); + + await EnterBarrierAsync("actors-started-5"); + + var a1 = _identify(_config.First, "a1"); + var a2 = _identify(_config.First, "a2"); + var a3 = _identify(_config.First, "a3"); + + b1.Tell(new WatchIt(a1)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + b1.Tell(new WatchIt(a2)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + b2.Tell(new WatchIt(a2)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + b3.Tell(new WatchIt(a3)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + await SleepAsync(); + b2.Tell(new UnwatchIt(a2)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + + await EnterBarrierAsync("watch-established-5"); + await EnterBarrierAsync("stopped-5"); + + p1.ReceiveN(2, TimeSpan.FromSeconds(20)) + .Cast() + .Select(w => w.T.ActorRef) + .OrderBy(r => r.Path.Name) + .ShouldBe([a1, a2]); + (await p3.ExpectMsgAsync(TimeSpan.FromSeconds(5))).T.ActorRef.ShouldBe(a3); + await p2.ExpectNoMsgAsync(TimeSpan.FromSeconds(2)); + await EnterBarrierAsync("terminated-verified-5"); - // verify that things are cleaned up, and heartbeating is stopped - AssertCleanup(); - ExpectNoMsg(TimeSpan.FromSeconds(2)); - p1.ExpectNoMsg(100); - p2.ExpectNoMsg(100); - p3.ExpectNoMsg(100); - AssertCleanup(); - }, _config.Second); - - RunOn(() => - { - EnterBarrier("actors-started-5"); - EnterBarrier("watch-established-5"); - EnterBarrier("stopped-5"); - EnterBarrier("terminated-verified-5"); - }, _config.Third); + // verify that things are cleaned up, and heartbeating is stopped + await AssertCleanup(); + await ExpectNoMsgAsync(TimeSpan.FromSeconds(2)); + await p1.ExpectNoMsgAsync(100); + await p2.ExpectNoMsgAsync(100); + await p3.ExpectNoMsgAsync(100); + await AssertCleanup(); + }, _config.Second); + + await RunOnAsync(async () => + { + await EnterBarrierAsync("actors-started-5"); + await EnterBarrierAsync("watch-established-5"); + await EnterBarrierAsync("stopped-5"); + await EnterBarrierAsync("terminated-verified-5"); + }, _config.Third); - EnterBarrier("after-5"); - } + await EnterBarrierAsync("after-5"); + } - private void RemoteNodeDeathWatch_must_receive_Terminated_when_watched_node_crash() + private async Task RemoteNodeDeathWatch_must_receive_Terminated_when_watched_node_crashAsync() + { + await RunOnAsync(async () => { - RunOn(() => - { - var watcher = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "watcher6"); - var watcher2 = Sys.ActorOf(Props.Create(() => new ProbeActor(Sys.DeadLetters))); - EnterBarrier("actors-started-6"); + var watcher = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "watcher6"); + var watcher2 = Sys.ActorOf(Props.Create(() => new ProbeActor(Sys.DeadLetters))); + await EnterBarrierAsync("actors-started-6"); - var subject = _identify(_config.Second, "subject6"); - watcher.Tell(new WatchIt(subject)); - ExpectMsg(TimeSpan.FromSeconds(1)); - watcher2.Tell(new WatchIt(subject)); - ExpectMsg(TimeSpan.FromSeconds(1)); - subject.Tell("hello6"); + var subject = _identify(_config.Second, "subject6"); + watcher.Tell(new WatchIt(subject)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + watcher2.Tell(new WatchIt(subject)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + subject.Tell("hello6"); - // testing with this watch/unwatch of watcher2 to make sure that the unwatch doesn't - // remove the first watch - watcher2.Tell(new UnwatchIt(subject)); - ExpectMsg(TimeSpan.FromSeconds(1)); + // testing with this watch/unwatch of watcher2 to make sure that the unwatch doesn't + // remove the first watch + watcher2.Tell(new UnwatchIt(subject)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); - EnterBarrier("watch-established-6"); + await EnterBarrierAsync("watch-established-6"); - Sleep(); + await SleepAsync(); - Log.Info("exit second"); - TestConductor.Exit(_config.Second, 0).Wait(); - ExpectMsg(TimeSpan.FromSeconds(15)).T.ActorRef.ShouldBe(subject); + Log.Info("exit second"); + await TestConductor.ExitAsync(_config.Second, 0); + (await ExpectMsgAsync(TimeSpan.FromSeconds(15))).T.ActorRef.ShouldBe(subject); - // verify that things are cleaned up, and heartbeating is stopped - AssertCleanup(); - ExpectNoMsg(TimeSpan.FromSeconds(2)); - AssertCleanup(); - }, _config.First); + // verify that things are cleaned up, and heartbeating is stopped + await AssertCleanup(); + await ExpectNoMsgAsync(TimeSpan.FromSeconds(2)); + await AssertCleanup(); + }, _config.First); - RunOn(() => - { - Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "subject6"); - EnterBarrier("actors-started-6"); + await RunOnAsync(async () => + { + Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "subject6"); + await EnterBarrierAsync("actors-started-6"); - ExpectMsg("hello6", TimeSpan.FromSeconds(3)); - EnterBarrier("watch-established-6"); - }, _config.Second); + await ExpectMsgAsync("hello6", TimeSpan.FromSeconds(3)); + await EnterBarrierAsync("watch-established-6"); + }, _config.Second); - RunOn(() => - { - EnterBarrier("actors-started-6"); - EnterBarrier("watch-established-6"); - }, _config.Third); + await RunOnAsync(async () => + { + await EnterBarrierAsync("actors-started-6"); + await EnterBarrierAsync("watch-established-6"); + }, _config.Third); - EnterBarrier("after-6"); - } + await EnterBarrierAsync("after-6"); + } - private void RemoteNodeDeathWatch_must_cleanup_when_watching_node_crash() + private async Task RemoteNodeDeathWatch_must_cleanup_when_watching_node_crashAsync() + { + await RunOnAsync(async () => { - RunOn(() => - { - var watcher = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "watcher7"); - EnterBarrier("actors-started-7"); + var watcher = Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "watcher7"); + await EnterBarrierAsync("actors-started-7"); - var subject = _identify(_config.First, "subject7"); - watcher.Tell(new WatchIt(subject)); - ExpectMsg(TimeSpan.FromSeconds(1)); - subject.Tell("hello7"); - EnterBarrier("watch-established-7"); - }, _config.Third); + var subject = _identify(_config.First, "subject7"); + watcher.Tell(new WatchIt(subject)); + await ExpectMsgAsync(TimeSpan.FromSeconds(1)); + subject.Tell("hello7"); + await EnterBarrierAsync("watch-established-7"); + }, _config.Third); - RunOn(() => - { - Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "subject7"); - EnterBarrier("actors-started-7"); + await RunOnAsync(async () => + { + Sys.ActorOf(Props.Create(() => new ProbeActor(TestActor)), "subject7"); + await EnterBarrierAsync("actors-started-7"); - ExpectMsg("hello7", TimeSpan.FromSeconds(3)); - EnterBarrier("watch-established-7"); + await ExpectMsgAsync("hello7", TimeSpan.FromSeconds(3)); + await EnterBarrierAsync("watch-established-7"); - Sleep(); + await SleepAsync(); - Log.Info("exit third"); - TestConductor.Exit(_config.Third, 0).Wait(); + Log.Info("exit third"); + await TestConductor.ExitAsync(_config.Third, 0); - // verify that things are cleaned up, and heartbeating is stopped - AssertCleanup(TimeSpan.FromSeconds(20)); - ExpectNoMsg(TimeSpan.FromSeconds(2)); - AssertCleanup(); - }, _config.First); + // verify that things are cleaned up, and heartbeating is stopped + await AssertCleanup(TimeSpan.FromSeconds(20)); + await ExpectNoMsgAsync(TimeSpan.FromSeconds(2)); + await AssertCleanup(); + }, _config.First); - EnterBarrier("after-7"); - } + await EnterBarrierAsync("after-7"); } +} - #region Several different variations of the test - - public class RemoteNodeDeathWatchFastSpec : RemoteNodeDeathWatchSpec - { - public RemoteNodeDeathWatchFastSpec() : base(typeof(RemoteNodeDeathWatchFastSpec)) - { } - - protected override string Scenario { get; } = "fast"; +#region Several different variations of the test - protected override Action Sleep { get; } = () => Thread.Sleep(100); - } +public class RemoteNodeDeathWatchFastSpec : RemoteNodeDeathWatchSpec +{ + public RemoteNodeDeathWatchFastSpec() : base(typeof(RemoteNodeDeathWatchFastSpec)) + { } - public class RemoteNodeDeathWatchSlowSpec : RemoteNodeDeathWatchSpec - { - public RemoteNodeDeathWatchSlowSpec() : base(typeof(RemoteNodeDeathWatchSlowSpec)) - { } + protected override string Scenario { get; } = "fast"; - protected override string Scenario { get; } = "slow"; + protected override Func SleepAsync { get; } = async () => await Task.Delay(100); +} - protected override Action Sleep { get; } = () => Thread.Sleep(3000); - } +public class RemoteNodeDeathWatchSlowSpec : RemoteNodeDeathWatchSpec +{ + public RemoteNodeDeathWatchSlowSpec() : base(typeof(RemoteNodeDeathWatchSlowSpec)) + { } - #endregion + protected override string Scenario { get; } = "slow"; + protected override Func SleepAsync { get; } = async () => await Task.Delay(3000); } + +#endregion \ No newline at end of file diff --git a/src/core/Akka.Remote.Tests.MultiNode/TestConductor/TestConductorSpec.cs b/src/core/Akka.Remote.Tests.MultiNode/TestConductor/TestConductorSpec.cs index f2c9163f7c9..5dacaf87e88 100644 --- a/src/core/Akka.Remote.Tests.MultiNode/TestConductor/TestConductorSpec.cs +++ b/src/core/Akka.Remote.Tests.MultiNode/TestConductor/TestConductorSpec.cs @@ -39,7 +39,7 @@ public TestConductorSpecConfig() public class TestConductorSpec : MultiNodeSpec { - private TestConductorSpecConfig _config; + private readonly TestConductorSpecConfig _config; public TestConductorSpec() : this(new TestConductorSpecConfig()) { } @@ -48,34 +48,28 @@ protected TestConductorSpec(TestConductorSpecConfig config) : base(config, typeo _config = config; } - protected override int InitialParticipantsValueFactory - { - get - { - return 2; - } - } + protected override int InitialParticipantsValueFactory => 2; private IActorRef _echo; - protected IActorRef GetEchoActorRef() + protected async Task GetEchoActorRef() { if (_echo == null) { Sys.ActorSelection(Node(_config.Master).Root / "user" / "echo").Tell(new Identify(null)); - _echo = ExpectMsg().Subject; + _echo = (await ExpectMsgAsync()).Subject; } return _echo; } [MultiNodeFact] - public void ATestConductorMust() + public async Task ATestConductorMust() { - Enter_a_Barrier(); - Support_Throttling_of_Network_Connections(); + await Enter_a_BarrierAsync(); + await Support_Throttling_of_Network_ConnectionsAsync(); } - public void Enter_a_Barrier() + public async Task Enter_a_BarrierAsync() { RunOn(() => { @@ -86,55 +80,55 @@ public void Enter_a_Barrier() }), "echo"); }, _config.Master); - EnterBarrier("name"); + await EnterBarrierAsync("name"); } - public void Support_Throttling_of_Network_Connections() + public async Task Support_Throttling_of_Network_ConnectionsAsync() { - RunOn(() => + await RunOnAsync(async () => { // start remote network connection so that it can be throttled - GetEchoActorRef().Tell("start"); + (await GetEchoActorRef()).Tell("start"); }, _config.Slave); - ExpectMsg("start"); + await ExpectMsgAsync("start"); - RunOn(() => + await RunOnAsync(async () => { - TestConductor.Throttle(_config.Slave, _config.Master, ThrottleTransportAdapter.Direction.Send, 0.01f).Wait(); + await TestConductor.ThrottleAsync(_config.Slave, _config.Master, ThrottleTransportAdapter.Direction.Send, 0.01f); }, _config.Master); - EnterBarrier("throttled_send"); + await EnterBarrierAsync("throttled_send"); - RunOn(() => + await RunOnAsync(async () => { foreach(var i in Enumerable.Range(0, 10)) { - GetEchoActorRef().Tell(i); + (await GetEchoActorRef()).Tell(i); } }, _config.Slave); // fudged the value to 0.5,since messages are a different size in Akka.NET - Within(TimeSpan.FromSeconds(0.5), TimeSpan.FromSeconds(2), () => + await WithinAsync(TimeSpan.FromSeconds(0.5), TimeSpan.FromSeconds(2), async () => { - ExpectMsg(0, TimeSpan.FromMilliseconds(500)); - ReceiveN(9).ShouldOnlyContainInOrder(Enumerable.Range(1,9).Cast().ToArray()); + await ExpectMsgAsync(0, TimeSpan.FromMilliseconds(500)); + (await ReceiveNAsync(9).ToListAsync()).ShouldOnlyContainInOrder(Enumerable.Range(1,9).Cast().ToArray()); }); - EnterBarrier("throttled_send2"); - RunOn(() => + await EnterBarrierAsync("throttled_send2"); + await RunOnAsync(async () => { - TestConductor.Throttle(_config.Slave, _config.Master, ThrottleTransportAdapter.Direction.Send, -1).Wait(); - TestConductor.Throttle(_config.Slave, _config.Master, ThrottleTransportAdapter.Direction.Receive, 0.01F).Wait(); + await TestConductor.ThrottleAsync(_config.Slave, _config.Master, ThrottleTransportAdapter.Direction.Send, -1); + await TestConductor.ThrottleAsync(_config.Slave, _config.Master, ThrottleTransportAdapter.Direction.Receive, 0.01F); }, _config.Master); - EnterBarrier("throttled_recv"); + await EnterBarrierAsync("throttled_recv"); - RunOn(() => + await RunOnAsync(async () => { foreach (var i in Enumerable.Range(10, 10)) { - GetEchoActorRef().Tell(i); + (await GetEchoActorRef()).Tell(i); } }, _config.Slave); @@ -142,20 +136,20 @@ public void Support_Throttling_of_Network_Connections() ? (TimeSpan.Zero, TimeSpan.FromMilliseconds(500)) : (TimeSpan.FromSeconds(0.3), TimeSpan.FromSeconds(3)); - Within(minMax.Item1, minMax.Item2, () => + await WithinAsync(minMax.Item1, minMax.Item2, async () => { - ExpectMsg(10, TimeSpan.FromMilliseconds(500)); - ReceiveN(9).ShouldOnlyContainInOrder(Enumerable.Range(11, 9).Cast().ToArray()); + await ExpectMsgAsync(10, TimeSpan.FromMilliseconds(500)); + (await ReceiveNAsync(9).ToListAsync()).ShouldOnlyContainInOrder(Enumerable.Range(11, 9).Cast().ToArray()); }); - EnterBarrier("throttled_recv2"); + await EnterBarrierAsync("throttled_recv2"); - RunOn(() => + await RunOnAsync(async () => { - TestConductor.Throttle(_config.Slave, _config.Master, ThrottleTransportAdapter.Direction.Receive, -1).Wait(); + await TestConductor.ThrottleAsync(_config.Slave, _config.Master, ThrottleTransportAdapter.Direction.Receive, -1); }, _config.Master); - EnterBarrier("after"); + await EnterBarrierAsync("after"); } } }