-
Notifications
You must be signed in to change notification settings - Fork 1.1k
fix: report Tcp.CommandFailed when a scheduled connect retry throws (#8195) #8214
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,8 +20,10 @@ namespace Akka.IO | |
| /// An actor handling the connection state machine for an outgoing connection | ||
| /// to be established. | ||
| /// </summary> | ||
| internal sealed class TcpOutgoingConnection : TcpConnection | ||
| internal sealed class TcpOutgoingConnection : TcpConnection, IWithTimers | ||
| { | ||
| private const string RetryConnectTimerKey = "retry-connect"; | ||
|
|
||
| private readonly IActorRef _commander; | ||
| private readonly Tcp.Connect _connect; | ||
|
|
||
|
|
@@ -30,6 +32,19 @@ internal sealed class TcpOutgoingConnection : TcpConnection | |
| private readonly ConnectException _finishConnectNeverReturnedTrueException = | ||
| new("Could not establish connection because finishConnect never returned true"); | ||
|
|
||
| public ITimerScheduler Timers { get; set; } | ||
|
|
||
| /// <summary> | ||
| /// Internal trigger used to re-attempt an outgoing connection from inside the | ||
| /// actor's own message loop, so connect failures are reported as <see cref="Tcp.CommandFailed"/> | ||
| /// instead of being swallowed by the scheduler thread. | ||
| /// </summary> | ||
| private sealed class RetryConnect | ||
| { | ||
| public static readonly RetryConnect Instance = new(); | ||
| private RetryConnect() { } | ||
| } | ||
|
|
||
| public TcpOutgoingConnection(TcpExt tcp, IActorRef commander, Tcp.Connect connect) | ||
| : base( | ||
| (connect.TcpSettings ?? tcp.Settings), | ||
|
|
@@ -210,25 +225,15 @@ private void Connecting(int remainingFinishConnectRetries, SocketAsyncEventArgs | |
| // used only when we've resolved a DNS endpoint. | ||
| case > 0 when fallbackAddress != null: | ||
| { | ||
| var self = Self; | ||
| var previousAddress = (IPEndPoint)args.RemoteEndPoint; | ||
| args.RemoteEndPoint = fallbackAddress; | ||
| Context.System.Scheduler.Advanced.ScheduleOnce(TimeSpan.FromMilliseconds(1), () => | ||
| { | ||
| if (!Socket.ConnectAsync(args)) | ||
| self.Tell(IO.Tcp.SocketConnected.Instance); | ||
| }); | ||
| ScheduleConnectRetry(); | ||
| Become(() => Connecting(remainingFinishConnectRetries - 1, args, previousAddress)); | ||
| break; | ||
| } | ||
| case > 0: | ||
| { | ||
| var self = Self; | ||
| Context.System.Scheduler.Advanced.ScheduleOnce(TimeSpan.FromMilliseconds(1), () => | ||
| { | ||
| if (!Socket.ConnectAsync(args)) | ||
| self.Tell(IO.Tcp.SocketConnected.Instance); | ||
| }); | ||
| ScheduleConnectRetry(); | ||
| Become(() => Connecting(remainingFinishConnectRetries - 1, args, null)); | ||
| break; | ||
| } | ||
|
|
@@ -239,13 +244,28 @@ private void Connecting(int remainingFinishConnectRetries, SocketAsyncEventArgs | |
| break; | ||
| } | ||
| }); | ||
| Receive<RetryConnect>(_ => | ||
| { | ||
| // Re-attempt the connection from within the actor's message loop so that any | ||
| // exception (e.g. PlatformNotSupportedException on Linux when reusing a socket | ||
| // after a failed connect attempt) is caught by ReportConnectFailure and reported | ||
| // to the commander as Tcp.CommandFailed instead of being swallowed by the scheduler. | ||
| ReportConnectFailure(() => | ||
| { | ||
| if (!Socket.ConnectAsync(args)) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. somewhat confusing: does not return a |
||
| Self.Tell(IO.Tcp.SocketConnected.Instance); | ||
| }); | ||
| }); | ||
| Receive<ReceiveTimeout>(_ => | ||
| { | ||
| if (_connect.Timeout.HasValue) Context.SetReceiveTimeout(null); // Clear the timeout | ||
| Log.Debug("Connect timeout expired, could not establish connection to [{0}]", _connect.RemoteAddress); | ||
| Stop(new ConnectException($"Connect timeout of {_connect.Timeout} expired")); | ||
| }); | ||
| } | ||
|
|
||
| private void ScheduleConnectRetry() | ||
| => Timers.StartSingleTimer(RetryConnectTimerKey, RetryConnect.Instance, TimeSpan.FromMilliseconds(1)); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. preserves the same timing mechanics as before, just via messaging rather than delegates. |
||
| } | ||
|
|
||
| [InternalApi] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah so this was the real bug - having a lambda running outside the actor attempting the socket reconnect. We probably should have never designed it that way. Refactoring it to use an in-actor scheduled message instead.