Skip to content

Commit 31e4247

Browse files
authored
Make remote call errors configurable (#1020)
* Make remote call errors configurable Resolves #932 * Hide enum, use OID * Add test for allowed but unregistered error type * Update docs * fix docc syntax
1 parent a475135 commit 31e4247

File tree

6 files changed

+465
-259
lines changed

6 files changed

+465
-259
lines changed

Sources/DistributedActors/ClusterSystem.swift

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,7 +1207,7 @@ extension ClusterSystem {
12071207
{
12081208
let callID = UUID()
12091209

1210-
let timeout = RemoteCall.timeout ?? self.settings.defaultRemoteCallTimeout
1210+
let timeout = RemoteCall.timeout ?? self.settings.remoteCall.defaultTimeout
12111211
let timeoutTask: Task<Void, Error> = Task.detached {
12121212
try await Task.sleep(nanoseconds: UInt64(timeout.nanoseconds))
12131213
guard !Task.isCancelled else {
@@ -1491,11 +1491,21 @@ public struct ClusterInvocationResultHandler: DistributedTargetInvocationResultH
14911491

14921492
case .remoteCall(let system, let callID, let channel, let recipient):
14931493
system.log.debug("Result handler, onThrow: \(error)", metadata: ["call/id": "\(callID)"])
1494+
1495+
let errorType = type(of: error as Any)
14941496
let reply: RemoteCallReply<_Done>
1497+
14951498
if let codableError = error as? (Error & Codable) {
1496-
reply = .init(callID: callID, error: codableError)
1499+
switch system.settings.remoteCall.codableErrorAllowance.underlying {
1500+
case .custom(let allowedTypeOIDs) where allowedTypeOIDs.contains(ObjectIdentifier(errorType)):
1501+
reply = .init(callID: callID, error: codableError)
1502+
case .all: // compiler gets confused if this is grouped together with above
1503+
reply = .init(callID: callID, error: codableError)
1504+
default:
1505+
reply = .init(callID: callID, error: GenericRemoteCallError(errorType: errorType))
1506+
}
14971507
} else {
1498-
reply = .init(callID: callID, error: GenericRemoteCallError(message: "Remote call error of [\(type(of: error as Any))] type occurred"))
1508+
reply = .init(callID: callID, error: GenericRemoteCallError(errorType: errorType))
14991509
}
15001510
try await channel.writeAndFlush(TransportEnvelope(envelope: Payload(payload: .message(reply)), recipient: recipient))
15011511
}
@@ -1585,6 +1595,14 @@ struct RemoteCallReply<Value: Codable>: AnyRemoteCallReply {
15851595

15861596
public struct GenericRemoteCallError: Error, Codable {
15871597
public let message: String
1598+
1599+
init(message: String) {
1600+
self.message = message
1601+
}
1602+
1603+
init(errorType: Any.Type) {
1604+
self.message = "Remote call error of [\(errorType)] type occurred"
1605+
}
15881606
}
15891607

15901608
public enum ClusterSystemError: DistributedActorSystemError {

Sources/DistributedActors/ClusterSystemSettings.swift

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,11 +167,8 @@ public struct ClusterSystemSettings {
167167
// ==== ------------------------------------------------------------------------------------------------------------
168168
// MARK: Distributed Actor Calls
169169

170-
/// If no other timeout is specified, this timeout is applied to every distributed call.
171-
/// A "distributed call" is any function call of a distributed function on a 'remote' distributed actor.
172-
///
173-
/// Set to `.effectivelyInfinite` to avoid setting a timeout, although this is not recommended.
174-
public var defaultRemoteCallTimeout: Duration = .seconds(5)
170+
/// A "remote call" is any function call of a `distributed` function on a 'remote' distributed actor.
171+
public var remoteCall: RemoteCallSettings = .default
175172

176173
// ==== ------------------------------------------------------------------------------------------------------------
177174
// MARK: TLS & Security settings
@@ -449,3 +446,48 @@ public struct ServiceDiscoverySettings {
449446
case dynamic(AnyServiceDiscovery)
450447
}
451448
}
449+
450+
// ==== ----------------------------------------------------------------------------------------------------------------
451+
// MARK: Remote Call Settings
452+
453+
extension ClusterSystemSettings {
454+
public struct RemoteCallSettings {
455+
public static var `default`: RemoteCallSettings {
456+
.init()
457+
}
458+
459+
/// If no other timeout is specified, this timeout is applied to every distributed call.
460+
///
461+
/// Set to `.effectivelyInfinite` to avoid setting a timeout, although this is not recommended.
462+
public var defaultTimeout: Duration = .seconds(5)
463+
464+
public var codableErrorAllowance: CodableErrorAllowanceSettings = .all
465+
466+
public struct CodableErrorAllowanceSettings {
467+
internal enum CodableErrorAllowance {
468+
case none
469+
case all
470+
// OIDs of allowed types
471+
case custom(Set<ObjectIdentifier>)
472+
}
473+
474+
internal let underlying: CodableErrorAllowance
475+
476+
internal init(allowance: CodableErrorAllowance) {
477+
self.underlying = allowance
478+
}
479+
480+
/// All ``Codable`` errors will be converted to ``GenericRemoteCallError``.
481+
public static let none: CodableErrorAllowanceSettings = .init(allowance: .none)
482+
483+
/// All ``Codable`` errors will be returned as-is.
484+
public static let all: CodableErrorAllowanceSettings = .init(allowance: .all)
485+
486+
/// Only the indicated ``Codable`` errors are allowed. Others are converted to ``GenericRemoteCallError``.
487+
public static func custom(allowedTypes: [(Error & Codable).Type]) -> CodableErrorAllowanceSettings {
488+
let oids = allowedTypes.map { ObjectIdentifier($0) }
489+
return .init(allowance: .custom(Set(oids)))
490+
}
491+
}
492+
}
493+
}

Sources/DistributedActors/DistributedActors.docc/Clustering.md

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ Similarly, you can implement the [ServiceDiscovery](https://github.com/apple/swi
9898
and this will then enable the cluster to locate nodes to contact and join automatically. It also benefits all other uses of service discovery in such new environment,
9999
so we encourage publishing your implementations if you're able to!
100100

101-
## Cluster events
101+
## Cluster Events
102102

103103
Cluster events are events emitted by the cluster as changes happen to the lifecycle of members of the cluster.
104104

@@ -273,19 +273,55 @@ distributed actor Boss: LifecycleWatch {
273273

274274
Remote calls are at the heart of what makes distributed actors actually distributed.
275275

276-
A call made on a remote distributed actor reference, will cross network boundaries, and therefore may way due to
276+
A call made on a remote distributed actor reference will cross network boundaries, and therefore may fail due to
277277
network issues, message loss, serialization errors, or other reasons such as the recipient node crashing as it
278278
processes the message. Even replies to remote calls could sometimes fail being delivered, so you might need to
279279
design your distributed actors with idempotency (the resilience of a method being called more than once, e.g. due to a retry) in mind.
280280

281281
By default, to avoid "hanging" a remote caller forever on a suspended remote call as the recipient node fails to reply to it,
282-
for example because it (or the network itself), are currently unresponsive, remote calls have a default timeout configured,
283-
and if no reply is received within this duration, the call will fail with a ``RemoteCallError``.
282+
for example because it (or the network itself), is currently unresponsive, remote calls have a default timeout configured.
283+
If no reply is received within this duration, the call will fail with a ``RemoteCallError/timedOut``.
284284

285285
You can configure the default timeout used by the cluster system during its initialization:
286286

287287
```swift
288288
ClusterSystem() { settings in
289-
settings.
289+
settings.remoteCall.defaultTimeout = .seconds(3)
290+
}
291+
```
292+
293+
You can override the default timeout for a specific remote call:
294+
295+
```swift
296+
try await RemoteCall.with(timeout: .seconds(5)) {
297+
try await worker.work()
298+
}
299+
```
300+
301+
### Remote call errors
302+
303+
By default, if a remote call results in an error that is ``Codable``, the error is returned as-is. Non-``Codable`` errors are
304+
converted to ``GenericRemoteCallError``.
305+
306+
You may restrict which ``Codable`` errors get sent back to the caller through configuration:
307+
308+
```swift
309+
ClusterSystem() { settings in
310+
// By default, all ``Codable`` errors are allowed.
311+
settings.remoteCall.codableErrorAllowance = .all
312+
}
313+
```
314+
315+
```swift
316+
ClusterSystem() { settings in
317+
// Only specific types are allowed. All others are returned as ``GenericRemoteCallError``.
318+
settings.remoteCall.codableErrorAllowance = .custom(allowedTypes: [SomeCodableError.self, AnotherCodableError.self, ...])
319+
}
320+
```
321+
322+
```swift
323+
ClusterSystem() { settings in
324+
// All errors are returned as ``GenericRemoteCallError``.
325+
settings.remoteCall.codableErrorAllowance = .none
290326
}
291327
```

Sources/DistributedActors/Plugins/ClusterSingleton/ClusterSingletonSettings.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,10 @@ public struct ClusterSingletonSettings {
4141
/// Singleton node allocation strategies.
4242
public struct AllocationStrategySettings {
4343
private enum AllocationStrategy {
44-
/// Singletons will run on the cluster leader. *All* nodes are potential candidates.
4544
case byLeadership
4645
}
4746

48-
private var allocationStrategy: AllocationStrategy
47+
private let allocationStrategy: AllocationStrategy
4948

5049
private init(allocationStrategy: AllocationStrategy) {
5150
self.allocationStrategy = allocationStrategy
@@ -58,5 +57,6 @@ public struct AllocationStrategySettings {
5857
}
5958
}
6059

60+
/// Singletons will run on the cluster leader. *All* nodes are potential candidates.
6161
public static let byLeadership: AllocationStrategySettings = .init(allocationStrategy: .byLeadership)
6262
}

0 commit comments

Comments
 (0)