diff --git a/src/Orleans.Core.Abstractions/Diagnostics/ActivitySources.cs b/src/Orleans.Core.Abstractions/Diagnostics/ActivitySources.cs new file mode 100644 index 00000000000..25557ec2b34 --- /dev/null +++ b/src/Orleans.Core.Abstractions/Diagnostics/ActivitySources.cs @@ -0,0 +1,32 @@ +using System.Diagnostics; + +namespace Orleans.Diagnostics; + +public static class ActivitySources +{ + /// + /// Spans triggered from application level code + /// + public const string ApplicationGrainActivitySourceName = "Microsoft.Orleans.Application"; + /// + /// Spans triggered from Orleans runtime code + /// + public const string RuntimeActivitySourceName = "Microsoft.Orleans.Runtime"; + /// + /// Spans tied to lifecycle operations such as activation, migration, and deactivation. + /// + public const string LifecycleActivitySourceName = "Microsoft.Orleans.Lifecycle"; + /// + /// Spans tied to persistent storage operations. + /// + public const string StorageActivitySourceName = "Microsoft.Orleans.Storage"; + /// + /// A wildcard name to match all Orleans activity sources. + /// + public const string AllActivitySourceName = "Microsoft.Orleans.*"; + + internal static readonly ActivitySource ApplicationGrainSource = new(ApplicationGrainActivitySourceName, "1.1.0"); + internal static readonly ActivitySource RuntimeGrainSource = new(RuntimeActivitySourceName, "2.0.0"); + internal static readonly ActivitySource LifecycleGrainSource = new(LifecycleActivitySourceName, "1.0.0"); + internal static readonly ActivitySource StorageGrainSource = new(StorageActivitySourceName, "1.0.0"); +} diff --git a/src/Orleans.Core.Abstractions/Diagnostics/ActivityTagKeys.cs b/src/Orleans.Core.Abstractions/Diagnostics/ActivityTagKeys.cs new file mode 100644 index 00000000000..f5d125fa806 --- /dev/null +++ b/src/Orleans.Core.Abstractions/Diagnostics/ActivityTagKeys.cs @@ -0,0 +1,148 @@ +namespace Orleans.Diagnostics; + +/// +/// Contains constants for Activity tag keys used throughout Orleans. +/// +internal static class ActivityTagKeys +{ + /// + /// The request ID for an async enumerable operation. + /// + public const string AsyncEnumerableRequestId = "orleans.async_enumerable.request_id"; + + /// + /// The activation ID tag key. + /// + public const string ActivationId = "orleans.activation.id"; + + /// + /// The activation cause tag key (e.g., "new" or "rehydrate"). + /// + public const string ActivationCause = "orleans.activation.cause"; + + /// + /// The deactivation reason tag key. + /// + public const string DeactivationReason = "orleans.deactivation.reason"; + + /// + /// The grain ID tag key. + /// + public const string GrainId = "orleans.grain.id"; + + /// + /// The grain type tag key. + /// + public const string GrainType = "orleans.grain.type"; + + /// + /// The grain type tag key. + /// + public const string GrainState = "orleans.grain.state"; + + /// + /// The silo ID tag key. + /// + public const string SiloId = "orleans.silo.id"; + + /// + /// The directory previous registration present tag key. + /// + public const string DirectoryPreviousRegistrationPresent = "orleans.directory.previousRegistration.present"; + + /// + /// The directory registered address tag key. + /// + public const string DirectoryRegisteredAddress = "orleans.directory.registered.address"; + + /// + /// The directory forwarding address tag key. + /// + public const string DirectoryForwardingAddress = "orleans.directory.forwarding.address"; + + /// + /// The exception type tag key. + /// + public const string ExceptionType = "exception.type"; + + /// + /// The exception message tag key. + /// + public const string ExceptionMessage = "exception.message"; + + /// + /// The placement filter type tag key. + /// + public const string PlacementFilterType = "orleans.placement.filter.type"; + + /// + /// The storage provider tag key. + /// + public const string StorageProvider = "orleans.storage.provider"; + + /// + /// The storage state name tag key. + /// + public const string StorageStateName = "orleans.storage.state.name"; + + /// + /// The storage state type tag key. + /// + public const string StorageStateType = "orleans.storage.state.type"; + + /// + /// The RPC system tag key. + /// + public const string RpcSystem = "rpc.system"; + + /// + /// The RPC service tag key. + /// + public const string RpcService = "rpc.service"; + + /// + /// The RPC method tag key. + /// + public const string RpcMethod = "rpc.method"; + + /// + /// The RPC Orleans target ID tag key. + /// + public const string RpcOrleansTargetId = "rpc.orleans.target_id"; + + /// + /// The RPC Orleans source ID tag key. + /// + public const string RpcOrleansSourceId = "rpc.orleans.source_id"; + + /// + /// The exception stacktrace tag key. + /// + public const string ExceptionStacktrace = "exception.stacktrace"; + + /// + /// The exception escaped tag key. + /// + public const string ExceptionEscaped = "exception.escaped"; + + /// + /// Indicates whether a rehydration attempt was ignored. + /// + public const string RehydrateIgnored = "orleans.rehydrate.ignored"; + + /// + /// The reason why a rehydration attempt was ignored. + /// + public const string RehydrateIgnoredReason = "orleans.rehydrate.ignored.reason"; + + /// + /// The previous registration address during rehydration. + /// + public const string RehydratePreviousRegistration = "orleans.rehydrate.previousRegistration"; + + /// + /// The target silo address for migration. + /// + public const string MigrationTargetSilo = "orleans.migration.target.silo"; +} + diff --git a/src/Orleans.Core.Abstractions/Diagnostics/OpenTelemetryHeaders.cs b/src/Orleans.Core.Abstractions/Diagnostics/OpenTelemetryHeaders.cs new file mode 100644 index 00000000000..c495584c2e3 --- /dev/null +++ b/src/Orleans.Core.Abstractions/Diagnostics/OpenTelemetryHeaders.cs @@ -0,0 +1,7 @@ +namespace Orleans.Diagnostics; + +internal static class OpenTelemetryHeaders +{ + internal const string TraceParent = "traceparent"; + internal const string TraceState = "tracestate"; +} diff --git a/src/Orleans.Core.Abstractions/Properties/AssemblyInfo.cs b/src/Orleans.Core.Abstractions/Properties/AssemblyInfo.cs index 2cdac083250..3700dce0a93 100644 --- a/src/Orleans.Core.Abstractions/Properties/AssemblyInfo.cs +++ b/src/Orleans.Core.Abstractions/Properties/AssemblyInfo.cs @@ -12,6 +12,7 @@ [assembly: InternalsVisibleTo("ServiceBus.Tests")] [assembly: InternalsVisibleTo("Tester.AzureUtils")] [assembly: InternalsVisibleTo("AWSUtils.Tests")] +[assembly: InternalsVisibleTo("Tester")] [assembly: InternalsVisibleTo("TesterInternal")] [assembly: InternalsVisibleTo("TestInternalGrainInterfaces")] [assembly: InternalsVisibleTo("TestInternalGrains")] diff --git a/src/Orleans.Core.Abstractions/Runtime/AsyncEnumerableRequest.cs b/src/Orleans.Core.Abstractions/Runtime/AsyncEnumerableRequest.cs index ded1c094bd8..f5aa55d94f3 100644 --- a/src/Orleans.Core.Abstractions/Runtime/AsyncEnumerableRequest.cs +++ b/src/Orleans.Core.Abstractions/Runtime/AsyncEnumerableRequest.cs @@ -9,6 +9,7 @@ using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using Orleans.Concurrency; +using Orleans.Diagnostics; using Orleans.Invocation; using Orleans.Serialization.Invocation; @@ -199,6 +200,7 @@ internal sealed class AsyncEnumeratorProxy : IAsyncEnumerator private int _batchIndex; private bool _disposed; private bool _isInitialized; + private Activity? _sessionActivity; private bool IsBatch => (_current.State & EnumerationResult.Batch) != 0; private bool IsElement => (_current.State & EnumerationResult.Element) != 0; @@ -270,6 +272,9 @@ public async ValueTask DisposeAsync() if (_isInitialized) { + // Restore the session activity as the current activity so that DisposeAsync RPC is parented to it + var previousActivity = Activity.Current; + Activity.Current = _sessionActivity; try { await _target.DisposeAsync(_requestId); @@ -279,9 +284,18 @@ public async ValueTask DisposeAsync() var logger = ((GrainReference)_target).Shared.ServiceProvider.GetRequiredService>>(); logger.LogWarning(exception, "Failed to dispose async enumerator."); } + finally + { + Activity.Current = previousActivity; + } } _cancellationTokenSource?.Dispose(); + + // Stop the session activity after DisposeAsync completes + _sessionActivity?.Stop(); + _sessionActivity?.Dispose(); + _disposed = true; } @@ -302,6 +316,14 @@ public async ValueTask MoveNextAsync() } var isActive = _isInitialized; + + // Restore the session activity as the current activity so that RPC calls are parented to it + var previousActivity = Activity.Current; + if (_sessionActivity is not null) + { + Activity.Current = _sessionActivity; + } + try { (EnumerationResult Status, object Value) result; @@ -311,6 +333,11 @@ public async ValueTask MoveNextAsync() if (!_isInitialized) { + // Start the session activity on first enumeration call + // This span wraps the entire enumeration session + _sessionActivity = ActivitySources.ApplicationGrainSource.StartActivity(_request.GetActivityName(), ActivityKind.Client); + _sessionActivity?.SetTag(ActivityTagKeys.AsyncEnumerableRequestId, _requestId.ToString()); + // Assume the enumerator is active as soon as the call begins. isActive = true; result = await _target.StartEnumeration(_requestId, _request, _cancellationToken); @@ -324,10 +351,12 @@ public async ValueTask MoveNextAsync() isActive = result.Status.IsActive(); if (result.Status is EnumerationResult.Error) { + _sessionActivity?.SetStatus(ActivityStatusCode.Error); ExceptionDispatchInfo.Capture((Exception)result.Value).Throw(); } else if (result.Status is EnumerationResult.Canceled) { + _sessionActivity?.SetStatus(ActivityStatusCode.Error, "Canceled"); throw new OperationCanceledException(); } @@ -339,6 +368,7 @@ public async ValueTask MoveNextAsync() if (result.Status is EnumerationResult.MissingEnumeratorError) { + _sessionActivity?.SetStatus(ActivityStatusCode.Error, "MissingEnumerator"); throw new EnumerationAbortedException("Enumeration aborted: the remote target does not have a record of this enumerator." + " This likely indicates that the remote grain was deactivated since enumeration begun or that the enumerator was idle for longer than the expiration period."); } @@ -356,6 +386,11 @@ await _target.DisposeAsync(_requestId).AsTask() .ConfigureAwait(ConfigureAwaitOptions.ContinueOnCapturedContext | ConfigureAwaitOptions.SuppressThrowing); throw; } + finally + { + // Restore the previous activity after each call + Activity.Current = previousActivity; + } } } diff --git a/src/Orleans.Core/Diagnostics/ActivityNames.cs b/src/Orleans.Core/Diagnostics/ActivityNames.cs new file mode 100644 index 00000000000..d99aef9f4a3 --- /dev/null +++ b/src/Orleans.Core/Diagnostics/ActivityNames.cs @@ -0,0 +1,18 @@ +namespace Orleans.Runtime; + +public static class ActivityNames +{ + public const string PlaceGrain = "place grain"; + public const string FilterPlacementCandidates = "filter placement candidates"; + public const string ActivateGrain = "activate grain"; + public const string DeactivateGrain = "deactivate grain"; + public const string OnActivate = "execute OnActivateAsync"; + public const string OnDeactivate = "execute OnDeactivateAsync"; + public const string RegisterDirectoryEntry = "register directory entry"; + public const string StorageRead = "read storage"; + public const string StorageWrite = "write storage"; + public const string StorageClear = "clear storage"; + public const string ActivationDehydrate = "dehydrate activation"; + public const string ActivationRehydrate = "rehydrate activation"; + public const string WaitMigration = "wait migration"; +} diff --git a/src/Orleans.Core/Diagnostics/ActivityPropagationGrainCallFilter.cs b/src/Orleans.Core/Diagnostics/ActivityPropagationGrainCallFilter.cs index 0b1534c110d..99732e73505 100644 --- a/src/Orleans.Core/Diagnostics/ActivityPropagationGrainCallFilter.cs +++ b/src/Orleans.Core/Diagnostics/ActivityPropagationGrainCallFilter.cs @@ -1,7 +1,5 @@ -using System; -using System.Collections.Generic; using System.Diagnostics; -using System.Threading.Tasks; +using Orleans.Diagnostics; namespace Orleans.Runtime { @@ -10,21 +8,36 @@ namespace Orleans.Runtime /// internal abstract class ActivityPropagationGrainCallFilter { - protected const string TraceParentHeaderName = "traceparent"; - protected const string TraceStateHeaderName = "tracestate"; - internal const string RpcSystem = "orleans"; internal const string OrleansNamespacePrefix = "Orleans"; - internal const string ApplicationGrainActivitySourceName = "Microsoft.Orleans.Application"; - internal const string RuntimeActivitySourceName = "Microsoft.Orleans.Runtime"; - protected static readonly ActivitySource ApplicationGrainSource = new(ApplicationGrainActivitySourceName, "1.0.0"); - protected static readonly ActivitySource RuntimeGrainSource = new(RuntimeActivitySourceName, "1.0.0"); + protected static ActivitySource GetActivitySource(IGrainCallContext context) + { + var interfaceType = context.Request.GetInterfaceType(); + var interfaceTypeName = interfaceType.Name; - protected static ActivitySource GetActivitySource(IGrainCallContext context) => - context.Request.GetInterfaceType().Namespace?.StartsWith(OrleansNamespacePrefix) == true - ? RuntimeGrainSource - : ApplicationGrainSource; + switch (interfaceTypeName) + { + // Memory storage uses grains for its implementation + case "IMemoryStorageGrain": + return ActivitySources.StorageGrainSource; + + // This extension is for explicit migrate/deactivate calls + case "IGrainManagementExtension": + // This target is for accepting migration batches + case "IActivationMigrationManagerSystemTarget": + return ActivitySources.LifecycleGrainSource; + + // These extensions are for async stream subscriptions + case "IAsyncEnumerableGrainExtension": + return ActivitySources.ApplicationGrainSource; + + default: + return interfaceType.Namespace?.StartsWith(OrleansNamespacePrefix) == true + ? ActivitySources.RuntimeGrainSource + : ActivitySources.ApplicationGrainSource; + } + } protected static void GetRequestContextValue(object carrier, string fieldName, out string fieldValue, out IEnumerable fieldValues) { @@ -37,17 +50,17 @@ protected static async Task Process(IGrainCallContext context, Activity activity if (activity is not null) { // rpc attributes from https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/rpc.md - activity.SetTag("rpc.system", RpcSystem); - activity.SetTag("rpc.service", context.InterfaceName); - activity.SetTag("rpc.method", context.MethodName); + activity.SetTag(ActivityTagKeys.RpcSystem, RpcSystem); + activity.SetTag(ActivityTagKeys.RpcService, context.InterfaceName); + activity.SetTag(ActivityTagKeys.RpcMethod, context.MethodName); if (activity.IsAllDataRequested) { // Custom attributes - activity.SetTag("rpc.orleans.target_id", context.TargetId.ToString()); + activity.SetTag(ActivityTagKeys.RpcOrleansTargetId, context.TargetId.ToString()); if (context.SourceId is GrainId sourceId) { - activity.SetTag("rpc.orleans.source_id", sourceId.ToString()); + activity.SetTag(ActivityTagKeys.RpcOrleansSourceId, sourceId.ToString()); } } } @@ -63,14 +76,14 @@ protected static async Task Process(IGrainCallContext context, Activity activity activity.SetStatus(ActivityStatusCode.Error); // exception attributes from https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/exceptions.md - activity.SetTag("exception.type", e.GetType().FullName); - activity.SetTag("exception.message", e.Message); + activity.SetTag(ActivityTagKeys.ExceptionType, e.GetType().FullName); + activity.SetTag(ActivityTagKeys.ExceptionMessage, e.Message); // Note that "exception.stacktrace" is the full exception detail, not just the StackTrace property. // See https://opentelemetry.io/docs/specs/semconv/attributes-registry/exception/ // and https://github.com/open-telemetry/opentelemetry-specification/pull/697#discussion_r453662519 - activity.SetTag("exception.stacktrace", e.ToString()); - activity.SetTag("exception.escaped", true); + activity.SetTag(ActivityTagKeys.ExceptionStacktrace, e.ToString()); + activity.SetTag(ActivityTagKeys.ExceptionEscaped, true); } throw; diff --git a/src/Orleans.Core/Runtime/RequestContextExtensions.cs b/src/Orleans.Core/Runtime/RequestContextExtensions.cs index be7f046e4f6..efb75428224 100644 --- a/src/Orleans.Core/Runtime/RequestContextExtensions.cs +++ b/src/Orleans.Core/Runtime/RequestContextExtensions.cs @@ -1,7 +1,6 @@ #nullable enable -using System; -using System.Collections.Generic; -using System.Linq; +using System.Diagnostics; +using Orleans.Diagnostics; using Orleans.Serialization; namespace Orleans.Runtime @@ -53,5 +52,36 @@ internal static Guid GetReentrancyId(Dictionary? contextData) _ = contextData.TryGetValue(RequestContext.CALL_CHAIN_REENTRANCY_HEADER, out var reentrancyId); return reentrancyId is Guid guid ? guid : Guid.Empty; } + + /// + /// Extracts an ActivityContext from request context data if present. + /// + internal static ActivityContext? TryGetActivityContext(this Dictionary? requestContextData) + { + if (requestContextData is not { Count: > 0 }) + { + return null; + } + + string? traceParent = null; + string? traceState = null; + + if (requestContextData.TryGetValue(OpenTelemetryHeaders.TraceParent, out var traceParentObj) && traceParentObj is string tp) + { + traceParent = tp; + } + + if (requestContextData.TryGetValue(OpenTelemetryHeaders.TraceState, out var traceStateObj) && traceStateObj is string ts) + { + traceState = ts; + } + + if (!string.IsNullOrEmpty(traceParent) && ActivityContext.TryParse(traceParent, traceState, isRemote: true, out var parentContext)) + { + return parentContext; + } + + return null; + } } } diff --git a/src/Orleans.Runtime/Catalog/ActivationData.cs b/src/Orleans.Runtime/Catalog/ActivationData.cs index c5b999bad5a..4ffcb1e8c3e 100644 --- a/src/Orleans.Runtime/Catalog/ActivationData.cs +++ b/src/Orleans.Runtime/Catalog/ActivationData.cs @@ -1,17 +1,14 @@ #nullable enable -using System; using System.Buffers; -using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using System.Threading; -using System.Threading.Tasks; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using Orleans.Configuration; using Orleans.Core.Internal; +using Orleans.Diagnostics; using Orleans.GrainDirectory; using Orleans.Internal; using Orleans.Runtime.Placement; @@ -25,6 +22,7 @@ namespace Orleans.Runtime; /// /// Maintains additional per-activation state that is required for Orleans internal operations. /// MUST lock this object for any concurrent access +/// MUST lock on `this` object because there are locks taken on ActivationData instances in various places in the codebase such as ActivationCollector.ScheduleCollection. /// Consider: compartmentalize by usage, e.g., using separate interfaces for data for catalog, etc. /// [DebuggerDisplay("GrainId = {GrainId}, State = {State}, Waiting = {WaitingCount}, Executing = {IsCurrentlyExecuting}")] @@ -66,6 +64,24 @@ internal sealed partial class ActivationData : private Task? _messageLoopTask; #pragma warning restore IDE0052 // Remove unread private members + private Activity? _activationActivity; + + /// + /// Constants for activity error event names used during activation lifecycle. + /// + private static class ActivityErrorEvents + { + public const string InstanceCreateFailed = "instance-create-failed"; + public const string DirectoryRegisterFailed = "directory-register-failed"; + public const string ActivationCancelled = "activation-cancelled"; + public const string ActivationFailed = "activation-failed"; + public const string ActivationError = "activation-error"; + public const string OnActivateFailed = "on-activate-failed"; + public const string OnDeactivateFailed = "on-deactivate-failed"; + public const string RehydrateError = "rehydrate-error"; + public const string DehydrateError = "dehydrate-error"; + } + public ActivationData( GrainAddress grainAddress, Func createWorkItemGroup, @@ -84,19 +100,37 @@ public ActivationData( Debug.Assert(_workItemGroup != null, "_workItemGroup must not be null."); } + internal void SetActivationActivity(Activity activity) + { + _activationActivity = activity; + } + + /// + /// Gets the activity context for the activation activity, if available. + /// This allows child activities to be properly parented during activation lifecycle operations. + /// + internal ActivityContext? GetActivationActivityContext() + { + return _activationActivity?.Context; + } + public void Start(IGrainActivator grainActivator) { Debug.Assert(Equals(ActivationTaskScheduler, TaskScheduler.Current)); + // locking on `this` is intentional as there are other places in the codebase taking locks on ActivationData instances lock (this) { try { var instance = grainActivator.CreateInstance(this); SetGrainInstance(instance); + _activationActivity?.AddEvent(new ActivityEvent("instance-created")); } catch (Exception exception) { - Deactivate(new(DeactivationReasonCode.ActivationFailed, exception, "Error constructing grain instance."), CancellationToken.None); + SetActivityError(_activationActivity, exception, ActivityErrorEvents.InstanceCreateFailed); + + Deactivate(new(DeactivationReasonCode.ActivationFailed, exception, "Error constructing grain instance."), _activationActivity?.Context, CancellationToken.None); } _messageLoopTask = RunMessageLoop(); @@ -544,39 +578,74 @@ public void Migrate(Dictionary? requestContext, CancellationToke } } - public void Deactivate(DeactivationReason reason, CancellationToken cancellationToken = default) + public void Deactivate(DeactivationReason reason, ActivityContext? activityContext, CancellationToken cancellationToken = default) { + var currentActivity = Activity.Current; + var deactivateActivity = activityContext is { } parent + ? ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.DeactivateGrain, ActivityKind.Internal, parentContext:parent) + : ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.DeactivateGrain); + lock (this) { - var state = State; - if (state is ActivationState.Invalid) + try { - return; - } + var state = State; + if (deactivateActivity is { IsAllDataRequested: true }) + { + deactivateActivity.SetTag(ActivityTagKeys.GrainState, state); + } - if (DeactivationReason.ReasonCode == DeactivationReasonCode.None) - { - DeactivationReason = reason; - } + if (state is ActivationState.Invalid) + { + deactivateActivity?.Stop(); + return; + } + + if (DeactivationReason.ReasonCode == DeactivationReasonCode.None) + { + DeactivationReason = reason; + } + + if (deactivateActivity is { IsAllDataRequested: true }) + { + deactivateActivity.SetTag(ActivityTagKeys.DeactivationReason, DeactivationReason); + } + + if (!DeactivationStartTime.HasValue) + { + DeactivationStartTime = GrainRuntime.TimeProvider.GetUtcNow().UtcDateTime; + } + + if (state is ActivationState.Creating or ActivationState.Activating or ActivationState.Valid) + { + CancelPendingOperations(); - if (!DeactivationStartTime.HasValue) + _shared.InternalRuntime.ActivationWorkingSet.OnDeactivating(this); + SetState(ActivationState.Deactivating); + var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + cts.CancelAfter(_shared.InternalRuntime.CollectionOptions.Value.DeactivationTimeout); + ScheduleOperation(new Command.Deactivate(cts, state, deactivateActivity)); + } + else + { + deactivateActivity?.Stop(); + } + } + catch (Exception ex) { - DeactivationStartTime = GrainRuntime.TimeProvider.GetUtcNow().UtcDateTime; + SetActivityError(deactivateActivity, ex, "Error deactivating grain"); + deactivateActivity?.Stop(); + throw; } - - if (state is ActivationState.Creating or ActivationState.Activating or ActivationState.Valid) + finally { - CancelPendingOperations(); - - _shared.InternalRuntime.ActivationWorkingSet.OnDeactivating(this); - SetState(ActivationState.Deactivating); - var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); - cts.CancelAfter(_shared.InternalRuntime.CollectionOptions.Value.DeactivationTimeout); - ScheduleOperation(new Command.Deactivate(cts, state)); + Activity.Current = currentActivity; } } } + public void Deactivate(DeactivationReason reason, CancellationToken cancellationToken = default) => Deactivate(reason, Activity.Current?.Context, cancellationToken); + private void DeactivateStuckActivation() { IsStuckProcessingMessage = true; @@ -988,7 +1057,9 @@ void ProcessPendingRequests() DeactivationReasonCode.IncompatibleRequest, $"Received incompatible request for interface {message.InterfaceType} version {message.InterfaceVersion}. This activation supports interface version {currentVersion}."); - Deactivate(reason, cancellationToken: default); + var activityContext = message.RequestContextData.TryGetActivityContext(); + + Deactivate(reason, activityContext, cancellationToken: default); return; } } @@ -1156,7 +1227,7 @@ async Task ProcessOperationsAsync() await ActivateAsync(command.RequestContext, command.CancellationToken).SuppressThrowing(); break; case Command.Deactivate command: - await FinishDeactivating(command.PreviousState, command.CancellationToken).SuppressThrowing(); + await FinishDeactivating(command, command.CancellationToken).SuppressThrowing(); break; case Command.Delay command: await Task.Delay(command.Duration, GrainRuntime.TimeProvider, command.CancellationToken).SuppressThrowing(); @@ -1171,7 +1242,10 @@ async Task ProcessOperationsAsync() } finally { - await DisposeAsync(op); + if (op is not null) + { + await DisposeAsync(op); + } } } } @@ -1179,22 +1253,44 @@ async Task ProcessOperationsAsync() private void RehydrateInternal(IRehydrationContext context) { + Activity? rehydrateSpan = null; try { LogRehydratingGrain(_shared.Logger, this); + var grainMigrationParticipant = GrainInstance as IGrainMigrationParticipant; + + if (grainMigrationParticipant is not null) + { + // Start a span for rehydration + rehydrateSpan = _activationActivity is not null + ? ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.ActivationRehydrate, + ActivityKind.Internal, _activationActivity.Context) + : ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.ActivationRehydrate, + ActivityKind.Internal); + rehydrateSpan?.SetTag(ActivityTagKeys.GrainId, GrainId.ToString()); + rehydrateSpan?.SetTag(ActivityTagKeys.GrainType, _shared.GrainTypeName); + rehydrateSpan?.SetTag(ActivityTagKeys.SiloId, _shared.Runtime.SiloAddress.ToString()); + rehydrateSpan?.SetTag(ActivityTagKeys.ActivationId, ActivationId.ToString()); + } + lock (this) { if (State != ActivationState.Creating) { LogIgnoringRehydrateAttempt(_shared.Logger, this, State); + rehydrateSpan?.SetTag(ActivityTagKeys.RehydrateIgnored, true); + rehydrateSpan?.SetTag(ActivityTagKeys.RehydrateIgnoredReason, $"State is {State}"); return; } - if (context.TryGetValue(GrainAddressMigrationContextKey, out GrainAddress? previousRegistration) && previousRegistration is not null) + if (context.TryGetValue(GrainAddressMigrationContextKey, out GrainAddress? previousRegistration) && + previousRegistration is not null) { PreviousRegistration = previousRegistration; LogPreviousActivationAddress(_shared.Logger, previousRegistration); + rehydrateSpan?.SetTag(ActivityTagKeys.RehydratePreviousRegistration, + previousRegistration.ToFullString()); } if (_lifecycle is { } lifecycle) @@ -1205,14 +1301,20 @@ private void RehydrateInternal(IRehydrationContext context) } } - (GrainInstance as IGrainMigrationParticipant)?.OnRehydrate(context); + grainMigrationParticipant?.OnRehydrate(context); } LogRehydratedGrain(_shared.Logger); + rehydrateSpan?.AddEvent(new ActivityEvent("rehydrated")); } catch (Exception exception) { LogErrorRehydratingActivation(_shared.Logger, exception); + SetActivityError(rehydrateSpan, exception, ActivityErrorEvents.RehydrateError); + } + finally + { + rehydrateSpan?.Dispose(); } } @@ -1228,11 +1330,38 @@ private void OnDehydrate(IDehydrationContext context) { context.TryAddValue(GrainAddressMigrationContextKey, Address); } - + + Activity? dehydrateSpan = null; try { + // Get the parent activity context from the dehydration context holder (captured when migration was initiated) + var parentContext = DehydrationContext?.MigrationActivityContext; + + var grainMigrationParticipant = GrainInstance as IGrainMigrationParticipant; + + if (grainMigrationParticipant is not null) + { + // Start a span for dehydration, parented to the migration request that triggered it + dehydrateSpan = parentContext.HasValue + ? ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.ActivationDehydrate, + ActivityKind.Internal, parentContext.Value) + : ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.ActivationDehydrate, + ActivityKind.Internal); + if (dehydrateSpan is { IsAllDataRequested: true }) + { + dehydrateSpan.SetTag(ActivityTagKeys.GrainId, GrainId.ToString()); + dehydrateSpan.SetTag(ActivityTagKeys.GrainType, _shared.GrainTypeName); + dehydrateSpan.SetTag(ActivityTagKeys.SiloId, _shared.Runtime.SiloAddress.ToString()); + dehydrateSpan.SetTag(ActivityTagKeys.ActivationId, ActivationId.ToString()); + if (ForwardingAddress is { } fwd) + { + dehydrateSpan.SetTag(ActivityTagKeys.MigrationTargetSilo, fwd.ToString()); + } + } + } + // Note that these calls are in reverse order from Rehydrate, not for any particular reason other than symmetry. - (GrainInstance as IGrainMigrationParticipant)?.OnDehydrate(context); + grainMigrationParticipant?.OnDehydrate(context); if (_lifecycle is { } lifecycle) { @@ -1245,12 +1374,18 @@ private void OnDehydrate(IDehydrationContext context) catch (Exception exception) { LogErrorDehydratingActivation(_shared.Logger, exception); + SetActivityError(dehydrateSpan, exception, ActivityErrorEvents.DehydrateError); + } + finally + { + dehydrateSpan?.Dispose(); } } LogDehydratedActivation(_shared.Logger); } + /// /// Handle an incoming message and queue/invoke appropriate handler /// @@ -1469,83 +1604,110 @@ private async Task ActivateAsync(Dictionary? requestContextData, return; } - // A chain of promises that will have to complete in order to complete the activation - // Register with the grain directory and call the Activate method on the new activation. + _activationActivity?.AddEvent(new ActivityEvent("activation-start")); try { - // Currently, the only grain type that is not registered in the Grain Directory is StatelessWorker. - // Among those that are registered in the directory, we currently do not have any multi activations. if (IsUsingGrainDirectory) { - Exception? registrationException; - var previousRegistration = PreviousRegistration; bool success; - try + Exception? registrationException; + + // Start directory registration activity as a child of the activation activity + using (var registerSpan = _activationActivity is not null + ? ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.RegisterDirectoryEntry, ActivityKind.Internal, _activationActivity.Context) + : ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.RegisterDirectoryEntry, ActivityKind.Internal)) { - while (true) + registerSpan?.SetTag(ActivityTagKeys.GrainId, GrainId.ToString()); + registerSpan?.SetTag(ActivityTagKeys.SiloId, _shared.Runtime.SiloAddress.ToString()); + registerSpan?.SetTag(ActivityTagKeys.ActivationId, ActivationId.ToString()); + registerSpan?.SetTag(ActivityTagKeys.DirectoryPreviousRegistrationPresent, + PreviousRegistration is not null); + var previousRegistration = PreviousRegistration; + + try { - LogRegisteringGrain(_shared.Logger, this, previousRegistration); - - var result = await _shared.InternalRuntime.GrainLocator.Register(Address, previousRegistration).WaitAsync(cancellationToken); - if (Address.Matches(result)) - { - Address = result; - success = true; - } - else if (result?.SiloAddress is { } registeredSilo && registeredSilo.Equals(Address.SiloAddress)) - { - // Attempt to register this activation again, using the registration of the previous instance of this grain, - // which is registered to this silo. That activation must be a defunct predecessor of this activation, - // since the catalog only allows one activation of a given grain at a time. - // This could occur if the previous activation failed to unregister itself from the grain directory. - previousRegistration = result; - LogAttemptToRegisterWithPreviousActivation(_shared.Logger, GrainId, result); - continue; - } - else + while (true) { - // Set the forwarding address so that messages enqueued on this activation can be forwarded to - // the existing activation. - ForwardingAddress = result?.SiloAddress; - if (ForwardingAddress is { } address) + LogRegisteringGrain(_shared.Logger, this, previousRegistration); + + var result = await _shared.InternalRuntime.GrainLocator + .Register(Address, previousRegistration).WaitAsync(cancellationToken); + if (Address.Matches(result)) + { + Address = result; + success = true; + _activationActivity?.AddEvent(new ActivityEvent("directory-register-success")); + registerSpan?.AddEvent(new ActivityEvent("success")); + registerSpan?.SetTag(ActivityTagKeys.DirectoryRegisteredAddress, result.ToFullString()); + } + else if (result?.SiloAddress is { } registeredSilo && + registeredSilo.Equals(Address.SiloAddress)) + { + previousRegistration = result; + LogAttemptToRegisterWithPreviousActivation(_shared.Logger, GrainId, result); + _activationActivity?.AddEvent(new ActivityEvent("directory-register-retry-previous")); + registerSpan?.AddEvent(new ActivityEvent("retry-previous")); + continue; + } + else { - DeactivationReason = new(DeactivationReasonCode.DuplicateActivation, $"This grain is active on another host ({address})."); + ForwardingAddress = result?.SiloAddress; + if (ForwardingAddress is { } address) + { + DeactivationReason = new(DeactivationReasonCode.DuplicateActivation, + $"This grain is active on another host ({address})."); + } + + success = false; + CatalogInstruments.ActivationConcurrentRegistrationAttempts.Add(1); + LogDuplicateActivation( + _shared.Logger, + Address, + ForwardingAddress, + GrainInstance?.GetType(), + new(Address), + WaitingCount); + _activationActivity?.AddEvent(new ActivityEvent("duplicate-activation")); + registerSpan?.AddEvent(new ActivityEvent("duplicate")); + if (ForwardingAddress is { } fwd) + { + registerSpan?.SetTag(ActivityTagKeys.DirectoryForwardingAddress, fwd.ToString()); + } } - success = false; - CatalogInstruments.ActivationConcurrentRegistrationAttempts.Add(1); - // If this was a duplicate, it's not an error, just a race. - // Forward on all of the pending messages, and then forget about this activation. - LogDuplicateActivation( - _shared.Logger, - Address, - ForwardingAddress, - GrainInstance?.GetType(), - new(Address), - WaitingCount); + break; } - break; + registrationException = null; } - - registrationException = null; - } - catch (Exception exception) - { - registrationException = exception; - if (!cancellationToken.IsCancellationRequested) + catch (Exception exception) { - LogFailedToRegisterGrain(_shared.Logger, registrationException, this); + registrationException = exception; + if (!cancellationToken.IsCancellationRequested) + { + LogFailedToRegisterGrain(_shared.Logger, registrationException, this); + } + + success = false; + _activationActivity?.AddEvent(new ActivityEvent("directory-register-failed")); + SetActivityError(registerSpan, exception, ActivityErrorEvents.DirectoryRegisterFailed); } - success = false; } - if (!success) { Deactivate(new(DeactivationReasonCode.DirectoryFailure, registrationException, "Failed to register activation in grain directory.")); // Activation failed. + if (registrationException is not null) + { + SetActivityError(_activationActivity, registrationException, ActivityErrorEvents.ActivationCancelled); + } + else + { + SetActivityError(_activationActivity, ActivityErrorEvents.ActivationCancelled); + } + return; } } @@ -1554,10 +1716,10 @@ private async Task ActivateAsync(Dictionary? requestContextData, { SetState(ActivationState.Activating); } + _activationActivity?.AddEvent(new ActivityEvent("state-activating")); LogActivatingGrain(_shared.Logger, this); - // Start grain lifecycle within try-catch wrapper to safely capture any exceptions thrown from called function try { RequestContextExtensions.Import(requestContextData); @@ -1565,17 +1727,32 @@ private async Task ActivateAsync(Dictionary? requestContextData, { if (_lifecycle is { } lifecycle) { + _activationActivity?.AddEvent(new ActivityEvent("lifecycle-start")); await lifecycle.OnStart(cancellationToken).WaitAsync(cancellationToken); + _activationActivity?.AddEvent(new ActivityEvent("lifecycle-started")); } } catch (Exception exception) { LogErrorStartingLifecycle(_shared.Logger, exception, this); + _activationActivity?.AddEvent(new ActivityEvent("lifecycle-start-failed")); throw; } if (GrainInstance is IGrainBase grainBase) { + // Start a span for OnActivateAsync execution + using var onActivateSpan = _activationActivity is not null + ? ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.OnActivate, ActivityKind.Internal, _activationActivity.Context) + : ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.OnActivate, ActivityKind.Internal); + if (onActivateSpan is { IsAllDataRequested: true }) + { + onActivateSpan.SetTag(ActivityTagKeys.GrainId, GrainId.ToString()); + onActivateSpan.SetTag(ActivityTagKeys.GrainType, _shared.GrainTypeName ?? GrainInstance.GetType().FullName); + onActivateSpan.SetTag(ActivityTagKeys.SiloId, _shared.Runtime.SiloAddress.ToString()); + onActivateSpan.SetTag(ActivityTagKeys.ActivationId, ActivationId.ToString()); + } + try { await grainBase.OnActivateAsync(cancellationToken).WaitAsync(cancellationToken); @@ -1583,6 +1760,7 @@ private async Task ActivateAsync(Dictionary? requestContextData, catch (Exception exception) { LogErrorInGrainMethod(_shared.Logger, exception, nameof(IGrainBase.OnActivateAsync), this); + SetActivityError(onActivateSpan, exception, ActivityErrorEvents.OnActivateFailed); throw; } } @@ -1591,48 +1769,61 @@ private async Task ActivateAsync(Dictionary? requestContextData, { if (State is ActivationState.Activating) { - SetState(ActivationState.Valid); // Activate calls on this activation are finished + SetState(ActivationState.Valid); _shared.InternalRuntime.ActivationWorkingSet.OnActivated(this); } } + _activationActivity?.AddEvent(new ActivityEvent("state-valid")); + _activationActivity?.Stop(); LogFinishedActivatingGrain(_shared.Logger, this); } catch (Exception exception) { CatalogInstruments.ActivationFailedToActivate.Add(1); - - // Capture the exception so that it can be propagated to rejection messages var sourceException = (exception as OrleansLifecycleCanceledException)?.InnerException ?? exception; LogErrorActivatingGrain(_shared.Logger, sourceException, this); - - // Unregister this as a message target after some period of time. - // This is delayed so that consistently failing activation, perhaps due to an application bug or network - // issue, does not cause a flood of doomed activations. - // If the cancellation token was canceled, there is no need to wait an additional time, since the activation - // has already waited some significant amount of time. if (!cancellationToken.IsCancellationRequested) { ScheduleOperation(new Command.Delay(TimeSpan.FromSeconds(5))); } - - // Perform the required deactivation steps. - Deactivate(new(DeactivationReasonCode.ActivationFailed, sourceException, "Failed to activate grain.")); - - // Activation failed. + Deactivate(new(DeactivationReasonCode.ActivationFailed, sourceException, "Failed to activate grain."), CancellationToken.None); + SetActivityError(_activationActivity, ActivityErrorEvents.ActivationFailed); + _activationActivity?.Stop(); return; } } catch (Exception exception) { LogActivationFailed(_shared.Logger, exception, this); - Deactivate(new(DeactivationReasonCode.ApplicationError, exception, "Failed to activate grain.")); + Deactivate(new(DeactivationReasonCode.ApplicationError, exception, "Failed to activate grain."), CancellationToken.None); + SetActivityError(_activationActivity, ActivityErrorEvents.ActivationError); + _activationActivity?.Stop(); } finally { _workSignal.Signal(); } } + + private void SetActivityError(Activity? erroredActivity, string? errorEventName) + { + if (erroredActivity is { } activity) + { + activity.SetStatus(ActivityStatusCode.Error, errorEventName); + } + } + + private void SetActivityError(Activity? erroredActivity, Exception exception, string? errorEventName) + { + if (erroredActivity is { } activity) + { + activity.SetStatus(ActivityStatusCode.Error, errorEventName); + activity.SetTag(ActivityTagKeys.ExceptionType, exception.GetType().FullName); + activity.SetTag(ActivityTagKeys.ExceptionMessage, exception.Message); + } + } + #endregion #region Deactivation @@ -1640,8 +1831,10 @@ private async Task ActivateAsync(Dictionary? requestContextData, /// /// Completes the deactivation process. /// - private async Task FinishDeactivating(ActivationState previousState, CancellationToken cancellationToken) + private async Task FinishDeactivating(Command.Deactivate deactivateCommand, CancellationToken cancellationToken) { + using var _ = deactivateCommand.Activity; + var migrating = false; var encounteredError = false; try @@ -1652,10 +1845,24 @@ private async Task FinishDeactivating(ActivationState previousState, Cancellatio DisposeTimers(); // If the grain was valid when deactivation started, call OnDeactivateAsync. - if (previousState == ActivationState.Valid) + if (deactivateCommand.PreviousState == ActivationState.Valid) { if (GrainInstance is IGrainBase grainBase) { + // Start a span for OnActivateAsync execution + + using var onDeactivateSpan = deactivateCommand.Activity is not null + ? ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.OnDeactivate, ActivityKind.Internal, parentContext:deactivateCommand.Activity.Context) + : ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.OnDeactivate, ActivityKind.Internal); + if (onDeactivateSpan is { IsAllDataRequested: true }) + { + onDeactivateSpan.SetTag(ActivityTagKeys.GrainId, GrainId.ToString()); + onDeactivateSpan.SetTag(ActivityTagKeys.GrainType, _shared.GrainTypeName ?? GrainInstance.GetType().FullName); + onDeactivateSpan.SetTag(ActivityTagKeys.SiloId, _shared.Runtime.SiloAddress.ToString()); + onDeactivateSpan.SetTag(ActivityTagKeys.ActivationId, ActivationId.ToString()); + onDeactivateSpan.SetTag(ActivityTagKeys.DeactivationReason, DeactivationReason.ToString()); + } + try { LogBeforeOnDeactivateAsync(_shared.Logger, this); @@ -1667,6 +1874,7 @@ private async Task FinishDeactivating(ActivationState previousState, Cancellatio catch (Exception exception) { LogErrorInGrainMethod(_shared.Logger, exception, nameof(IGrainBase.OnDeactivateAsync), this); + SetActivityError(onDeactivateSpan, exception, ActivityErrorEvents.OnDeactivateFailed); // Swallow the exception and continue with deactivation. encounteredError = true; @@ -1727,6 +1935,7 @@ private async Task FinishDeactivating(ActivationState previousState, Cancellatio } catch (Exception ex) { + SetActivityError(deactivateCommand.Activity, ex, "Error in FinishDeactivating"); LogErrorDeactivating(_shared.Logger, ex, this); } @@ -1755,6 +1964,7 @@ private async Task FinishDeactivating(ActivationState previousState, Cancellatio } catch (Exception exception) { + SetActivityError(deactivateCommand.Activity, exception, "Error in FinishDeactivating"); LogExceptionDisposing(_shared.Logger, exception, this); } @@ -2117,9 +2327,10 @@ public virtual void Dispose() GC.SuppressFinalize(this); } - public sealed class Deactivate(CancellationTokenSource cts, ActivationState previousState) : Command(cts) + public sealed class Deactivate(CancellationTokenSource cts, ActivationState previousState, Activity? activity) : Command(cts) { public ActivationState PreviousState { get; } = previousState; + public Activity? Activity { get; } = activity; } public sealed class Activate(Dictionary? requestContext, CancellationTokenSource cts) : Command(cts) @@ -2179,6 +2390,12 @@ private class DehydrationContextHolder(SerializerSessionPool sessionPool, Dictio { public readonly MigrationContext MigrationContext = new(sessionPool); public readonly Dictionary? RequestContext = requestContext; + + /// + /// The activity context from the grain call that initiated the migration. + /// This is used to parent the dehydrate span to the migration request trace. + /// + public ActivityContext? MigrationActivityContext { get; set; } = Activity.Current?.Context; } [LoggerMessage( @@ -2361,11 +2578,6 @@ private readonly struct ActivationDataLogValue(ActivationData activation, bool i Message = "Error activating grain {Grain}")] private static partial void LogErrorActivatingGrain(ILogger logger, Exception exception, ActivationData grain); - [LoggerMessage( - Level = LogLevel.Error, - Message = "Activation of grain {Grain} failed")] - private static partial void LogActivationFailed(ILogger logger, Exception exception, ActivationData grain); - [LoggerMessage( Level = LogLevel.Trace, Message = "Completing deactivation of '{Activation}'")] @@ -2432,4 +2644,9 @@ private static partial void LogDuplicateActivation( Level = LogLevel.Debug, Message = "Rerouting {NumMessages} messages from invalid grain activation {Grain}")] private static partial void LogReroutingMessagesNoForwarding(ILogger logger, int numMessages, ActivationData grain); + + [LoggerMessage( + Level = LogLevel.Error, + Message = "Activation of grain {Grain} failed")] + private static partial void LogActivationFailed(ILogger logger, Exception exception, ActivationData grain); } diff --git a/src/Orleans.Runtime/Catalog/ActivationMigrationManager.cs b/src/Orleans.Runtime/Catalog/ActivationMigrationManager.cs index 0e040ebd70f..c7aae8ad5cd 100644 --- a/src/Orleans.Runtime/Catalog/ActivationMigrationManager.cs +++ b/src/Orleans.Runtime/Catalog/ActivationMigrationManager.cs @@ -9,6 +9,7 @@ using System.Threading.Tasks.Sources; using Microsoft.Extensions.Logging; using Microsoft.Extensions.ObjectPool; +using Orleans.Diagnostics; using Orleans.Internal; using Orleans.Runtime.Internal; using Orleans.Runtime.Scheduler; @@ -98,6 +99,7 @@ public ActivationMigrationManager( public async ValueTask AcceptMigratingGrains(List migratingGrains) { var activations = new List(); + var currentActivity = Activity.Current; foreach (var package in migratingGrains) { // If the activation does not exist, create it and provide it with the migration context while doing so. @@ -107,12 +109,15 @@ public async ValueTask AcceptMigratingGrains(List migrati { activations.Add(activation); } + + Activity.Current = currentActivity; } // Wait for all activations to become active or reach a terminal state. // This ensures that the activation has completed registration in the directory (or is abandoned) before we return. // Otherwise, there could be a race where the original silo removes the activation from its catalog, receives a new message for that activation, // and re-activates it before the new activation on this silo has been registered with the directory. + using var waitActivity = ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.WaitMigration); while (true) { var allActiveOrTerminal = true; diff --git a/src/Orleans.Runtime/Catalog/Catalog.cs b/src/Orleans.Runtime/Catalog/Catalog.cs index 2783b6f9e87..e031f50d99b 100644 --- a/src/Orleans.Runtime/Catalog/Catalog.cs +++ b/src/Orleans.Runtime/Catalog/Catalog.cs @@ -1,12 +1,10 @@ -using System; -using System.Collections.Generic; using System.Runtime.CompilerServices; -using System.Threading; -using System.Threading.Tasks; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using Orleans.GrainDirectory; using Orleans.Runtime.GrainDirectory; +using System.Diagnostics; +using Orleans.Diagnostics; namespace Orleans.Runtime { @@ -168,6 +166,25 @@ public IGrainContext GetOrCreateActivation( return UnableToCreateActivation(this, grainId); } + // Start activation span with parent context from request if available + var parentContext = requestContextData.TryGetActivityContext(); + var activationActivity = parentContext.HasValue + ? ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.ActivateGrain, ActivityKind.Internal, parentContext.Value) + : ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.ActivateGrain, ActivityKind.Internal); + if (activationActivity is not null) + { + activationActivity.SetTag(ActivityTagKeys.GrainId, grainId.ToString()); + activationActivity.SetTag(ActivityTagKeys.GrainType, grainId.Type.ToString()); + activationActivity.SetTag(ActivityTagKeys.SiloId, Silo.ToString()); + activationActivity.SetTag(ActivityTagKeys.ActivationCause, rehydrationContext is null ? "new" : "rehydrate"); + if (result is ActivationData act) + { + activationActivity.SetTag(ActivityTagKeys.ActivationId, act.ActivationId.ToString()); + act.SetActivationActivity(activationActivity); + activationActivity.AddEvent(new ActivityEvent("creating")); + } + } + CatalogInstruments.ActivationsCreated.Add(1); // Rehydration occurs before activation. @@ -430,6 +447,5 @@ private readonly struct SiloAddressLogValue(SiloAddress silo) Message = "Failed to unregister non-existent activation {Address}" )] private partial void LogFailedToUnregisterNonExistingActivation(GrainAddress address, Exception exception); - } } diff --git a/src/Orleans.Runtime/Core/InsideRuntimeClient.cs b/src/Orleans.Runtime/Core/InsideRuntimeClient.cs index c4786c3584c..d0ef329e206 100644 --- a/src/Orleans.Runtime/Core/InsideRuntimeClient.cs +++ b/src/Orleans.Runtime/Core/InsideRuntimeClient.cs @@ -10,6 +10,7 @@ using Microsoft.Extensions.Options; using Orleans.CodeGeneration; using Orleans.Configuration; +using Orleans.Diagnostics; using Orleans.GrainReferences; using Orleans.Metadata; using Orleans.Runtime.GrainDirectory; @@ -313,7 +314,15 @@ public async Task Invoke(IGrainContext target, Message message) ise.IsSourceActivation = false; LogDeactivatingInconsistentState(this.invokeExceptionLogger, target, invocationException); - target.Deactivate(new DeactivationReason(DeactivationReasonCode.ApplicationError, LogFormatter.PrintException(invocationException))); + + if (target is ActivationData ad && message.RequestContextData.TryGetActivityContext() is { } ac) + { + ad.Deactivate(new DeactivationReason(DeactivationReasonCode.ApplicationError, LogFormatter.PrintException(invocationException)), ac); + } + else + { + target.Deactivate(new DeactivationReason(DeactivationReasonCode.ApplicationError, LogFormatter.PrintException(invocationException))); + } } } diff --git a/src/Orleans.Runtime/Orleans.Runtime.csproj b/src/Orleans.Runtime/Orleans.Runtime.csproj index f9f503df5a3..1c15e73b3ae 100644 --- a/src/Orleans.Runtime/Orleans.Runtime.csproj +++ b/src/Orleans.Runtime/Orleans.Runtime.csproj @@ -16,4 +16,8 @@ + + + + diff --git a/src/Orleans.Runtime/Placement/PlacementService.cs b/src/Orleans.Runtime/Placement/PlacementService.cs index 92acac246a3..8576495b2c2 100644 --- a/src/Orleans.Runtime/Placement/PlacementService.cs +++ b/src/Orleans.Runtime/Placement/PlacementService.cs @@ -1,13 +1,10 @@ -using System; -using System.Collections.Generic; using System.Diagnostics; -using System.Linq; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using System.Threading.Tasks; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using Orleans.Configuration; +using Orleans.Diagnostics; using Orleans.Placement; using Orleans.Runtime.GrainDirectory; using Orleans.Runtime.Internal; @@ -120,11 +117,20 @@ public SiloAddress[] GetCompatibleSilos(PlacementTarget target) var filters = _filterStrategyResolver.GetPlacementFilterStrategies(grainType); if (filters.Length > 0) { + // Capture the parent activity context now so each filter span is parented to the + // current activity (e.g. PlaceGrain) rather than to sibling filter spans that may + // be active during deferred enumeration. + var parentActivityContext = Activity.Current?.Context; + IEnumerable filteredSilos = compatibleSilos; foreach (var placementFilter in filters) { var director = _placementFilterDirectoryResolver.GetFilterDirector(placementFilter); - filteredSilos = director.Filter(placementFilter, target, filteredSilos); + filteredSilos = InstrumentFilteredSilos( + director.Filter(placementFilter, target, filteredSilos), + placementFilter, + grainType, + parentActivityContext); } compatibleSilos = filteredSilos.ToArray(); @@ -220,11 +226,12 @@ bool CachedAddressIsValidCore(Message message, GrainAddress cachedAddress, List< /// The request context, which will be available to the placement strategy. /// The placement strategy to use. /// A location for the new activation. - public Task PlaceGrainAsync(GrainId grainId, Dictionary requestContextData, PlacementStrategy placementStrategy) + public async Task PlaceGrainAsync(GrainId grainId, Dictionary requestContextData, PlacementStrategy placementStrategy) { + using var placeGrainActivity = TryRestoreActivityContext(requestContextData, ActivityNames.PlaceGrain); var target = new PlacementTarget(grainId, requestContextData, default, 0); var director = _directorResolver.GetPlacementDirector(placementStrategy); - return director.OnAddActivation(placementStrategy, target, this); + return await director.OnAddActivation(placementStrategy, target, this); } private class PlacementWorker @@ -367,32 +374,48 @@ private void AddressWaitingMessages(GrainPlacementWorkItem completedWorkItem) private async Task GetOrPlaceActivationAsync(Message firstMessage) { await Task.Yield(); - var target = new PlacementTarget( - firstMessage.TargetGrain, - firstMessage.RequestContextData, - firstMessage.InterfaceType, - firstMessage.InterfaceVersion); - - var targetGrain = target.GrainIdentity; - var result = await _placementService._grainLocator.Lookup(targetGrain); - if (result is not null) - { - return result.SiloAddress; - } - var strategy = _placementService._strategyResolver.GetPlacementStrategy(target.GrainIdentity.Type); - var director = _placementService._directorResolver.GetPlacementDirector(strategy); - var siloAddress = await director.OnAddActivation(strategy, target, _placementService); + // InnerGetOrPlaceActivationAsync may set a new activity as current from the RequestContextData, + // so we need to save and restore the current activity. + var currentActivity = Activity.Current; + var activationLocation = await InnerGetOrPlaceActivationAsync(); + Activity.Current = currentActivity; + + return activationLocation; - // Give the grain locator one last chance to tell us that the grain has already been placed - if (_placementService._grainLocator.TryLookupInCache(targetGrain, out result) && _placementService.CachedAddressIsValid(firstMessage, result)) + async Task InnerGetOrPlaceActivationAsync() { - return result.SiloAddress; - } + // Restore activity context from the message's request context data + // This ensures directory lookups are properly traced as children of the original request + using var restoredActivity = TryRestoreActivityContext(firstMessage.RequestContextData, ActivityNames.PlaceGrain); + + var target = new PlacementTarget( + firstMessage.TargetGrain, + firstMessage.RequestContextData, + firstMessage.InterfaceType, + firstMessage.InterfaceVersion); + + var targetGrain = target.GrainIdentity; + var result = await _placementService._grainLocator.Lookup(targetGrain); + if (result is not null) + { + return result.SiloAddress; + } + + var strategy = _placementService._strategyResolver.GetPlacementStrategy(target.GrainIdentity.Type); + var director = _placementService._directorResolver.GetPlacementDirector(strategy); + var siloAddress = await director.OnAddActivation(strategy, target, _placementService); - _placementService._grainLocator.InvalidateCache(targetGrain); - _placementService._grainLocator.UpdateCache(targetGrain, siloAddress); - return siloAddress; + // Give the grain locator one last chance to tell us that the grain has already been placed + if (_placementService._grainLocator.TryLookupInCache(targetGrain, out result) && _placementService.CachedAddressIsValid(firstMessage, result)) + { + return result.SiloAddress; + } + + _placementService._grainLocator.InvalidateCache(targetGrain); + _placementService._grainLocator.UpdateCache(targetGrain, siloAddress); + return siloAddress; + } } private class GrainPlacementWorkItem @@ -403,6 +426,50 @@ private class GrainPlacementWorkItem } } + /// + /// Wraps a filter's output enumerable so that an Activity span is created when the + /// sequence is actually enumerated, not when the filter is composed. This avoids + /// per-filter array materialization while still giving accurate span timings. + /// + private static IEnumerable InstrumentFilteredSilos( + IEnumerable silos, + PlacementFilterStrategy filter, + GrainType grainType, + ActivityContext? parentActivityContext) + { + using var filterSpan = parentActivityContext is { } parentContext + ? ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.FilterPlacementCandidates, ActivityKind.Internal, parentContext) + : ActivitySources.LifecycleGrainSource.StartActivity(ActivityNames.FilterPlacementCandidates); + filterSpan?.SetTag(ActivityTagKeys.PlacementFilterType, filter.GetType().Name); + filterSpan?.SetTag(ActivityTagKeys.GrainType, grainType.ToString()); + + foreach (var silo in silos) + { + yield return silo; + } + } + + /// + /// Attempts to restore the parent activity context from request context data. + /// + private static Activity TryRestoreActivityContext(Dictionary requestContextData, string operationName) + { + if (requestContextData is null) + { + return null; + } + + var activityContext = requestContextData.TryGetActivityContext(); + + if (activityContext is {} parentContext) + { + // Start the activity from the Catalog's ActivitySource to properly associate it with activation tracing + return ActivitySources.LifecycleGrainSource.StartActivity(operationName, ActivityKind.Internal, parentContext); + } + + return null; + } + [LoggerMessage( Level = LogLevel.Debug, Message = "Found address {Address} for grain {GrainId} in cache for message {Message}" diff --git a/src/Orleans.Runtime/Storage/StateStorageBridge.cs b/src/Orleans.Runtime/Storage/StateStorageBridge.cs index 58ddf0bd96a..3b34115cb49 100644 --- a/src/Orleans.Runtime/Storage/StateStorageBridge.cs +++ b/src/Orleans.Runtime/Storage/StateStorageBridge.cs @@ -1,12 +1,11 @@ #nullable enable -using System; using System.Collections.Concurrent; +using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Runtime.ExceptionServices; -using System.Threading.Tasks; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; -using Orleans.Runtime; +using Orleans.Diagnostics; using Orleans.Serialization.Activators; using Orleans.Serialization.Serializers; using Orleans.Storage; @@ -83,6 +82,22 @@ public async Task ReadStateAsync() { GrainRuntime.CheckRuntimeContext(RuntimeContext.Current); + // Try to get the parent activity context from the current activity or from the activation's stored activity + var parentContext = Activity.Current?.Context; + if (parentContext is null && _grainContext is ActivationData activationData) + { + // If we're in activation context and there's an activation activity, use it as parent + parentContext = activationData.GetActivationActivityContext(); + } + + using var activity = parentContext.HasValue + ? ActivitySources.StorageGrainSource.StartActivity(ActivityNames.StorageRead, ActivityKind.Client, parentContext.Value) + : ActivitySources.StorageGrainSource.StartActivity(ActivityNames.StorageRead, ActivityKind.Client); + activity?.SetTag(ActivityTagKeys.GrainId, _grainContext.GrainId.ToString()); + activity?.SetTag(ActivityTagKeys.StorageProvider, _shared.ProviderTypeName); + activity?.SetTag(ActivityTagKeys.StorageStateName, _shared.Name); + activity?.SetTag(ActivityTagKeys.StorageStateType, _shared.StateTypeName); + var sw = ValueStopwatch.StartNew(); await _shared.Store.ReadStateAsync(_shared.Name, _grainContext.GrainId, GrainState); IsStateInitialized = true; @@ -102,6 +117,21 @@ public async Task WriteStateAsync() { GrainRuntime.CheckRuntimeContext(RuntimeContext.Current); + // Try to get the parent activity context from the current activity or from the activation's stored activity + var parentContext = Activity.Current?.Context; + if (parentContext is null && _grainContext is ActivationData activationData) + { + parentContext = activationData.GetActivationActivityContext(); + } + + using var activity = parentContext.HasValue + ? ActivitySources.StorageGrainSource.StartActivity(ActivityNames.StorageWrite, ActivityKind.Client, parentContext.Value) + : ActivitySources.StorageGrainSource.StartActivity(ActivityNames.StorageWrite, ActivityKind.Client); + activity?.SetTag(ActivityTagKeys.GrainId, _grainContext.GrainId.ToString()); + activity?.SetTag(ActivityTagKeys.StorageProvider, _shared.ProviderTypeName); + activity?.SetTag(ActivityTagKeys.StorageStateName, _shared.Name); + activity?.SetTag(ActivityTagKeys.StorageStateType, _shared.StateTypeName); + var sw = ValueStopwatch.StartNew(); await _shared.Store.WriteStateAsync(_shared.Name, _grainContext.GrainId, GrainState); StorageInstruments.OnStorageWrite(sw.Elapsed, _shared.ProviderTypeName, _shared.Name, _shared.StateTypeName); @@ -120,6 +150,21 @@ public async Task ClearStateAsync() { GrainRuntime.CheckRuntimeContext(RuntimeContext.Current); + // Try to get the parent activity context from the current activity or from the activation's stored activity + var parentContext = Activity.Current?.Context; + if (parentContext is null && _grainContext is ActivationData activationData) + { + parentContext = activationData.GetActivationActivityContext(); + } + + using var activity = parentContext.HasValue + ? ActivitySources.StorageGrainSource.StartActivity(ActivityNames.StorageClear, ActivityKind.Client, parentContext.Value) + : ActivitySources.StorageGrainSource.StartActivity(ActivityNames.StorageClear, ActivityKind.Client); + activity?.SetTag(ActivityTagKeys.GrainId, _grainContext.GrainId.ToString()); + activity?.SetTag(ActivityTagKeys.StorageProvider, _shared.ProviderTypeName); + activity?.SetTag(ActivityTagKeys.StorageStateName, _shared.Name); + activity?.SetTag(ActivityTagKeys.StorageStateType, _shared.StateTypeName); + var sw = ValueStopwatch.StartNew(); // Clear state in external storage diff --git a/test/Grains/TestGrainInterfaces/IActivityGrain.cs b/test/Grains/TestGrainInterfaces/IActivityGrain.cs index 3bd0f5e11f9..6aef64f893f 100644 --- a/test/Grains/TestGrainInterfaces/IActivityGrain.cs +++ b/test/Grains/TestGrainInterfaces/IActivityGrain.cs @@ -5,6 +5,21 @@ public interface IActivityGrain : IGrainWithIntegerKey Task GetActivityId(); } + /// + /// Grain interface for testing IAsyncEnumerable activity tracing. + /// + public interface IAsyncEnumerableActivityGrain : IGrainWithIntegerKey + { + /// + /// Gets multiple ActivityData items as an async enumerable. + /// Each item captures the current Activity context at the time of yield. + /// + /// Number of items to yield. + /// Cancellation token. + /// An async enumerable of ActivityData items. + IAsyncEnumerable GetActivityDataStream(int count, CancellationToken cancellationToken = default); + } + [GenerateSerializer] public class ActivityData { diff --git a/test/Grains/TestGrains/ActivityGrain.cs b/test/Grains/TestGrains/ActivityGrain.cs index b3f7d75cda4..54bdbf01994 100644 --- a/test/Grains/TestGrains/ActivityGrain.cs +++ b/test/Grains/TestGrains/ActivityGrain.cs @@ -1,4 +1,5 @@ using System.Diagnostics; +using System.Runtime.CompilerServices; using UnitTests.GrainInterfaces; namespace UnitTests.Grains @@ -23,4 +24,33 @@ public Task GetActivityId() return Task.FromResult(result); } } + + /// + /// Grain implementation for testing IAsyncEnumerable activity tracing. + /// + public class AsyncEnumerableActivityGrain : Grain, IAsyncEnumerableActivityGrain + { + public async IAsyncEnumerable GetActivityDataStream(int count, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + for (int i = 0; i < count; i++) + { + cancellationToken.ThrowIfCancellationRequested(); + + var activity = Activity.Current; + var data = activity is null + ? new ActivityData() + : new ActivityData + { + Id = activity.Id, + TraceState = activity.TraceStateString, + Baggage = activity.Baggage.ToList(), + }; + + yield return data; + + // Small delay to allow for proper activity propagation + await Task.Yield(); + } + } + } } diff --git a/test/Tester/ActivationTracingTests.cs b/test/Tester/ActivationTracingTests.cs new file mode 100644 index 00000000000..cec44b1023f --- /dev/null +++ b/test/Tester/ActivationTracingTests.cs @@ -0,0 +1,3775 @@ +using System.Collections.Concurrent; +using System.Diagnostics; +using System.Text; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Orleans; +using Orleans.Core.Internal; +using Orleans.Diagnostics; +using Orleans.Placement; +using Orleans.Runtime.Placement; +using Orleans.Storage; +using Orleans.TestingHost; +using TestExtensions; +using UnitTests.GrainInterfaces; +using Xunit; +using Xunit.Abstractions; + +namespace UnitTests.General +{ + /// + /// Failing test demonstrating missing activation tracing spans. + /// Expects an activation Activity to be created on first grain activation. + /// + [Collection("ActivationTracing")] + public class ActivationTracingTests : OrleansTestingBase, IClassFixture + { + private static readonly ConcurrentBag Started = new(); + + static ActivationTracingTests() + { + var listener = new ActivityListener + { + ShouldListenTo = src => src.Name == ActivitySources.ApplicationGrainActivitySourceName + || src.Name == ActivitySources.LifecycleActivitySourceName + || src.Name == ActivitySources.StorageActivitySourceName, + Sample = (ref _) => ActivitySamplingResult.AllData, + SampleUsingParentId = (ref _) => ActivitySamplingResult.AllData, + ActivityStarted = activity => Started.Add(activity), + }; + ActivitySource.AddActivityListener(listener); + } + + public class Fixture : BaseTestClusterFixture + { + protected override void ConfigureTestCluster(TestClusterBuilder builder) + { + builder.Options.InitialSilosCount = 2; // Need 2 silos for migration tests + builder.ConfigureHostConfiguration(TestDefaultConfiguration.ConfigureHostConfiguration); + builder.AddSiloBuilderConfigurator(); + builder.AddClientBuilderConfigurator(); + } + + private class SiloCfg : ISiloConfigurator + { +#pragma warning disable ORLEANSEXP003 // Type is for evaluation purposes only and is subject to change or removal in future updates. + public void Configure(ISiloBuilder hostBuilder) + { + hostBuilder + .AddActivityPropagation() + .AddDistributedGrainDirectory() + .AddMemoryGrainStorageAsDefault() + .AddMemoryGrainStorage("PubSubStore") + .AddIncomingGrainCallFilter(); + hostBuilder.Services.AddPlacementFilter(ServiceLifetime.Singleton); + hostBuilder.Services.AddPlacementFilter(ServiceLifetime.Singleton); + } +#pragma warning restore ORLEANSEXP003 + } + + private class ClientCfg : IClientBuilderConfigurator + { + public void Configure(IConfiguration configuration, IClientBuilder clientBuilder) + { + clientBuilder.AddActivityPropagation(); + } + } + } + + private readonly Fixture _fixture; + private readonly ITestOutputHelper _output; + + public ActivationTracingTests(Fixture fixture, ITestOutputHelper output) + { + _fixture = fixture; + _output = output; + } + + [Fact] + [TestCategory("BVT")] + public async Task ActivationSpanIsCreatedOnFirstCall() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent"); + parent?.Start(); + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + // First call should force activation + _ = await grain.GetActivityId(); + + // Expect at least one activation-related activity + var activationActivities = Started.Where(a => a.Source.Name == ActivitySources.LifecycleActivitySourceName).ToList(); + Assert.True(activationActivities.Count > 0, "Expected activation tracing activity to be created, but none were observed."); + + // Verify all expected spans are present and properly parented under test-parent + var testParentTraceId = parent.TraceId.ToString(); + + // Find the placement span - should be parented to the grain call which is parented to test-parent + var placementSpan = Started.FirstOrDefault(a => a.OperationName == ActivityNames.PlaceGrain); + Assert.NotNull(placementSpan); + Assert.Equal(testParentTraceId, placementSpan.TraceId.ToString()); + + // Find the placement filter span - should share the same trace ID as test-parent + var placementFilterSpan = Started.FirstOrDefault(a => a.OperationName == ActivityNames.FilterPlacementCandidates); + Assert.Null(placementFilterSpan); + + // Find the activation span - should be parented to the grain call which is parented to test-parent + var activationSpan = Started.FirstOrDefault(a => a.OperationName == ActivityNames.ActivateGrain); + Assert.NotNull(activationSpan); + Assert.Equal(testParentTraceId, activationSpan.TraceId.ToString()); + + // Find the OnActivateAsync span - should be parented to the activation span + var onActivateSpan = Started.FirstOrDefault(a => a.OperationName == ActivityNames.OnActivate); + Assert.Null(onActivateSpan); + + // Find the directory register span - should be parented to activation span + var directoryRegisterSpan = Started.FirstOrDefault(a => a.OperationName == ActivityNames.RegisterDirectoryEntry); + Assert.NotNull(directoryRegisterSpan); + Assert.Equal(testParentTraceId, directoryRegisterSpan.TraceId.ToString()); + Assert.Equal(activationSpan.SpanId.ToString(), directoryRegisterSpan.ParentSpanId.ToString()); + } + finally + { + parent.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + [Fact] + [TestCategory("BVT")] + public async Task ActivationSpanIncludesFilter() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-filter"); + parent?.Start(); + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + // First call should force activation + _ = await grain.GetActivityId(); + + // Expect at least one activation-related activity + var activationActivities = Started.Where(a => a.Source.Name == ActivitySources.LifecycleActivitySourceName).ToList(); + Assert.True(activationActivities.Count > 0, "Expected activation tracing activity to be created, but none were observed."); + + // Verify all expected spans are present and properly parented under test-parent + var testParentTraceId = parent.TraceId.ToString(); + + // Find the placement span - should be parented to the grain call which is parented to test-parent + var placementSpan = Started.FirstOrDefault(a => a.OperationName == ActivityNames.PlaceGrain); + Assert.NotNull(placementSpan); + Assert.Equal(testParentTraceId, placementSpan.TraceId.ToString()); + + // Find the placement filter span - should share the same trace ID as test-parent + var placementFilterSpan = Started.FirstOrDefault(a => a.OperationName == ActivityNames.FilterPlacementCandidates); + Assert.NotNull(placementFilterSpan); + Assert.Equal(testParentTraceId, placementFilterSpan.TraceId.ToString()); + Assert.Equal("TracingTestPlacementFilterStrategy", placementFilterSpan.Tags.FirstOrDefault(t => t.Key == "orleans.placement.filter.type").Value); + + // Find the activation span - should be parented to the grain call which is parented to test-parent + var activationSpan = Started.FirstOrDefault(a => a.OperationName == ActivityNames.ActivateGrain); + Assert.NotNull(activationSpan); + Assert.Equal(testParentTraceId, activationSpan.TraceId.ToString()); + + // Find the OnActivateAsync span - should be parented to the activation span + var onActivateSpan = Started.FirstOrDefault(a => a.OperationName == ActivityNames.OnActivate); + Assert.NotNull(onActivateSpan); + Assert.Equal(testParentTraceId, onActivateSpan.TraceId.ToString()); + Assert.Equal(activationSpan.SpanId.ToString(), onActivateSpan.ParentSpanId.ToString()); + + // Find the directory register span - should be parented to activation span + var directoryRegisterSpan = Started.FirstOrDefault(a => a.OperationName == ActivityNames.RegisterDirectoryEntry); + Assert.NotNull(directoryRegisterSpan); + Assert.Equal(testParentTraceId, directoryRegisterSpan.TraceId.ToString()); + Assert.Equal(activationSpan.SpanId.ToString(), directoryRegisterSpan.ParentSpanId.ToString()); + } + finally + { + parent.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + [Fact] + [TestCategory("BVT")] + public async Task ActivationSpanIncludesMultipleFilters() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-multi-filter"); + parent?.Start(); + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + // First call should force activation + _ = await grain.GetActivityId(); + + // Verify all expected spans are present and properly parented under test-parent + var testParentTraceId = parent.TraceId.ToString(); + var testParentSpanId = parent.SpanId.ToString(); + + // Find the placement span + var placementSpan = Started.FirstOrDefault(a => a.OperationName == ActivityNames.PlaceGrain); + Assert.NotNull(placementSpan); + Assert.Equal(testParentTraceId, placementSpan.TraceId.ToString()); + + // Find ALL placement filter spans - should be 2 (one for each filter) + var placementFilterSpans = Started + .Where(a => a.OperationName == ActivityNames.FilterPlacementCandidates) + .OrderBy(a => a.StartTimeUtc) + .ToList(); + Assert.Equal(2, placementFilterSpans.Count); + + // Both filter spans should share the same trace ID as test-parent + foreach (var filterSpan in placementFilterSpans) + { + Assert.Equal(testParentTraceId, filterSpan.TraceId.ToString()); + // Each filter span should be parented directly to the PlaceGrain span + Assert.Equal(placementSpan.SpanId.ToString(), filterSpan.ParentSpanId.ToString()); + } + + // Verify that both filters were executed + var filterTypes = placementFilterSpans + .Select(span => span.Tags.FirstOrDefault(t => t.Key == "orleans.placement.filter.type").Value) + .ToHashSet(); + Assert.Contains("TracingTestPlacementFilterStrategy", filterTypes); + Assert.Contains("SecondTracingTestPlacementFilterStrategy", filterTypes); + } + finally + { + parent.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + [Fact] + [TestCategory("BVT")] + public async Task PersistentStateReadSpanIsCreatedDuringActivation() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-storage"); + parent?.Start(); + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + // First call should force activation which triggers state read + _ = await grain.GetActivityId(); + + // Expect at least one activation-related activity + var activationActivities = Started.Where(a => a.Source.Name == ActivitySources.LifecycleActivitySourceName).ToList(); + Assert.True(activationActivities.Count > 0, "Expected activation tracing activity to be created, but none were observed."); + + // Verify all expected spans are present and properly parented under test-parent + var testParentTraceId = parent.TraceId.ToString(); + + // Find the activation span - should be parented to the grain call which is parented to test-parent + var activationSpan = Started.FirstOrDefault(a => a.OperationName == ActivityNames.ActivateGrain && a.Tags.First(kv => kv.Key == "orleans.grain.type").Value == "persistentstateactivity"); + Assert.NotNull(activationSpan); + Assert.Equal(testParentTraceId, activationSpan.TraceId.ToString()); + + // Find the storage read span - should share the same trace ID as test-parent + var storageReadSpan = Started.FirstOrDefault(a => a.OperationName == ActivityNames.StorageRead); + Assert.NotNull(storageReadSpan); + Assert.Equal(testParentTraceId, storageReadSpan.TraceId.ToString()); + + // Verify storage read span has expected tags + Assert.Equal("MemoryGrainStorage", storageReadSpan.Tags.FirstOrDefault(t => t.Key == "orleans.storage.provider").Value); + Assert.Equal("state", storageReadSpan.Tags.FirstOrDefault(t => t.Key == "orleans.storage.state.name").Value); + Assert.Equal("PersistentStateActivityGrainState", storageReadSpan.Tags.FirstOrDefault(t => t.Key == "orleans.storage.state.type").Value); + + // Verify the grain ID tag is present + var grainIdTag = storageReadSpan.Tags.FirstOrDefault(t => t.Key == "orleans.grain.id").Value; + Assert.NotNull(grainIdTag); + } + finally + { + parent.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that dehydrate and rehydrate spans are created during grain migration. + /// Verifies that the migration process creates proper tracing spans for both + /// dehydration (on the source silo) and rehydration (on the target silo). + /// + [Fact] + [TestCategory("BVT")] + public async Task MigrationSpansAreCreatedDuringGrainMigration() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-migration"); + parent?.Start(); + try + { + // Create a grain and set some state + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var expectedState = Random.Shared.Next(); + await grain.SetState(expectedState); + var originalAddress = await grain.GetGrainAddress(); + var originalHost = originalAddress.SiloAddress; + + // Find a different silo to migrate to + var targetHost = _fixture.HostedCluster.GetActiveSilos() + .Select(s => s.SiloAddress) + .First(address => address != originalHost); + + // Trigger migration with a placement hint to coerce the placement director to use the target silo + RequestContext.Set(IPlacementDirector.PlacementHintKey, targetHost); + await grain.Cast().MigrateOnIdle(); + + // Verify the state was preserved (this also waits for migration to complete) + var newState = await grain.GetState(); + Assert.Equal(expectedState, newState); + + // Give some time for all activities to complete + await Task.Delay(500); + + var testParentTraceId = parent.TraceId.ToString(); + + // Verify dehydrate span was created + var dehydrateSpans = Started.Where(a => a.OperationName == ActivityNames.ActivationDehydrate).ToList(); + Assert.True(dehydrateSpans.Count > 0, "Expected at least one dehydrate span to be created during migration"); + + var dehydrateSpan = dehydrateSpans.First(); + Assert.NotNull(dehydrateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.grain.id").Value); + Assert.NotNull(dehydrateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.silo.id").Value); + Assert.NotNull(dehydrateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.activation.id").Value); + // Verify target silo tag is present + Assert.Equal(targetHost.ToString(), dehydrateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.migration.target.silo").Value); + // Verify dehydrate span is parented to the migration request trace + Assert.Equal(testParentTraceId, dehydrateSpan.TraceId.ToString()); + + // Verify rehydrate span was created on the target silo + var rehydrateSpans = Started.Where(a => a.OperationName == ActivityNames.ActivationRehydrate).ToList(); + Assert.True(rehydrateSpans.Count > 0, "Expected at least one rehydrate span to be created during migration"); + + var rehydrateSpan = rehydrateSpans.First(); + Assert.NotNull(rehydrateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.grain.id").Value); + Assert.NotNull(rehydrateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.silo.id").Value); + Assert.NotNull(rehydrateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.activation.id").Value); + // Verify the rehydrate span has the previous registration tag + Assert.NotNull(rehydrateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.rehydrate.previousRegistration").Value); + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that FilterPlacementCandidates spans are properly parented under a PlaceGrain span + /// when migration triggers placement via PlaceGrainAsync. + /// This covers the code path where PlaceGrainAsync (not the PlacementWorker message path) + /// calls a placement director which calls GetCompatibleSilos with filters. + /// + [Fact] + [TestCategory("BVT")] + public async Task MigrationPlacementFilterSpanIsParentedUnderPlaceGrainSpan() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-migration-filter"); + parent?.Start(); + try + { + // Create a grain that has both a placement filter and migration support + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var expectedState = Random.Shared.Next(); + await grain.SetState(expectedState); + var originalAddress = await grain.GetGrainAddress(); + var originalHost = originalAddress.SiloAddress; + + // Find a different silo to migrate to + var targetHost = _fixture.HostedCluster.GetActiveSilos() + .Select(s => s.SiloAddress) + .First(address => address != originalHost); + + // Clear activities to focus on migration placement + Started.Clear(); + + // Trigger migration with a placement hint + RequestContext.Set(IPlacementDirector.PlacementHintKey, targetHost); + await grain.Cast().MigrateOnIdle(); + + // Verify the state was preserved (this also waits for migration to complete) + var newState = await grain.GetState(); + Assert.Equal(expectedState, newState); + + // Give some time for all activities to complete + await Task.Delay(500); + + var testParentTraceId = parent.TraceId.ToString(); + + // Find the PlaceGrain span created during migration's PlaceGrainAsync call + var placementSpans = Started.Where(a => a.OperationName == ActivityNames.PlaceGrain).ToList(); + Assert.True(placementSpans.Count > 0, "Expected at least one PlaceGrain span during migration"); + + var placementSpan = placementSpans.First(); + Assert.Equal(testParentTraceId, placementSpan.TraceId.ToString()); + + // Find the FilterPlacementCandidates span - should share the same trace ID + var filterSpans = Started.Where(a => a.OperationName == ActivityNames.FilterPlacementCandidates).ToList(); + Assert.True(filterSpans.Count > 0, "Expected at least one FilterPlacementCandidates span during migration with filter"); + + var filterSpan = filterSpans.First(); + Assert.Equal(testParentTraceId, filterSpan.TraceId.ToString()); + Assert.Equal("TracingTestPlacementFilterStrategy", filterSpan.Tags.FirstOrDefault(t => t.Key == "orleans.placement.filter.type").Value); + + // The filter span should be a child of the PlaceGrain span + Assert.Equal(placementSpan.SpanId.ToString(), filterSpan.ParentSpanId.ToString()); + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that dehydrate and rehydrate spans are created during migration of a grain with persistent state. + /// Verifies that IPersistentState participates in migration and creates proper tracing spans. + /// + [Fact] + [TestCategory("BVT")] + public async Task MigrationSpansAreCreatedForGrainWithPersistentState() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-migration-persistent"); + parent?.Start(); + try + { + // Create a grain with persistent state and set some state + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var expectedStateA = Random.Shared.Next(); + var expectedStateB = Random.Shared.Next(); + await grain.SetState(expectedStateA, expectedStateB); + var originalAddress = await grain.GetGrainAddress(); + var originalHost = originalAddress.SiloAddress; + + // Find a different silo to migrate to + var targetHost = _fixture.HostedCluster.GetActiveSilos() + .Select(s => s.SiloAddress) + .First(address => address != originalHost); + + // Trigger migration with a placement hint + RequestContext.Set(IPlacementDirector.PlacementHintKey, targetHost); + await grain.Cast().MigrateOnIdle(); + + // Wait for migration to complete + GrainAddress newAddress; + do + { + await Task.Delay(100); + newAddress = await grain.GetGrainAddress(); + } while (newAddress.ActivationId == originalAddress.ActivationId); + + // Verify the grain migrated to the target silo + Assert.Equal(targetHost, newAddress.SiloAddress); + + // Verify the state was preserved + var (actualA, actualB) = await grain.GetState(); + Assert.Equal(expectedStateA, actualA); + Assert.Equal(expectedStateB, actualB); + + // Give some time for all activities to complete + await Task.Delay(500); + + // Verify dehydrate span was NOT created (grain doesn't implement IGrainMigrationParticipant) + var dehydrateSpans = Started.Where(a => a.OperationName == ActivityNames.ActivationDehydrate).ToList(); + Assert.True(dehydrateSpans.Count == 0, $"Expected no dehydrate spans for grain without IGrainMigrationParticipant, but found {dehydrateSpans.Count}"); + + // Verify rehydrate span was NOT created + var rehydrateSpans = Started.Where(a => a.OperationName == ActivityNames.ActivationRehydrate).ToList(); + Assert.True(rehydrateSpans.Count == 0, $"Expected no rehydrate spans for grain without IGrainMigrationParticipant, but found {rehydrateSpans.Count}"); + + // Verify storage read span was NOT created during rehydration (state is transferred via migration context) + // Note: Storage read should NOT happen during migration - the state is transferred in-memory + var storageReadSpansAfterMigration = Started.Where(a => a.OperationName == ActivityNames.StorageRead).ToList(); + // During migration, storage should not be read because state is transferred via dehydration context + // The storage read only happens on fresh activation, not on rehydration + + Assert.Equal(2, storageReadSpansAfterMigration.Count); + } + finally + { + parent.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that dehydrate and rehydrate spans are NOT created during migration of a grain + /// that does not implement IGrainMigrationParticipant. + /// + [Fact] + [TestCategory("BVT")] + public async Task DehydrateAndRehydrateSpansAreNotCreatedForGrainWithoutMigrationParticipant() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-no-migration-participant"); + parent?.Start(); + try + { + // Create a grain that doesn't implement IGrainMigrationParticipant + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var expectedState = Random.Shared.Next(); + await grain.SetState(expectedState); + var originalAddress = await grain.GetGrainAddress(); + var originalHost = originalAddress.SiloAddress; + + // Find a different silo to migrate to + var targetHost = _fixture.HostedCluster.GetActiveSilos() + .Select(s => s.SiloAddress) + .First(address => address != originalHost); + + // Trigger migration with a placement hint to coerce the placement director to use the target silo + RequestContext.Set(IPlacementDirector.PlacementHintKey, targetHost); + await grain.Cast().MigrateOnIdle(); + + // Make a call to ensure grain is activated on target silo + // Note: State won't be preserved since grain doesn't participate in migration + _ = await grain.GetState(); + + // Give some time for all activities to complete + await Task.Delay(500); + + // Verify dehydrate span was NOT created (grain doesn't implement IGrainMigrationParticipant) + var dehydrateSpans = Started.Where(a => a.OperationName == ActivityNames.ActivationDehydrate).ToList(); + Assert.True(dehydrateSpans.Count == 0, $"Expected no dehydrate spans for grain without IGrainMigrationParticipant, but found {dehydrateSpans.Count}"); + + // Verify rehydrate span was NOT created + var rehydrateSpans = Started.Where(a => a.OperationName == ActivityNames.ActivationRehydrate).ToList(); + Assert.True(rehydrateSpans.Count == 0, $"Expected no rehydrate spans for grain without IGrainMigrationParticipant, but found {rehydrateSpans.Count}"); + + // Verify that activation span WAS created (the grain was still activated on the new silo) + var activationSpans = Started.Where(a => a.OperationName == ActivityNames.ActivateGrain).ToList(); + Assert.True(activationSpans.Count > 0, "Expected at least one activation span for the migrated grain"); + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that appropriate tracing spans are created for IAsyncEnumerable grain calls with multiple elements. + /// Verifies that: + /// 1. A session span is created with the original method name (GetActivityDataStream) + /// 2. StartEnumeration, MoveNext, and DisposeAsync spans are nested under the session span + /// 3. All spans share the same trace context + /// + [Fact] + [TestCategory("BVT")] + public async Task AsyncEnumerableSpansAreCreatedForMultipleElements() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-async-enumerable"); + parent?.Start(); + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + const int elementCount = 5; + + var values = new List(); + await foreach (var entry in grain.GetActivityDataStream(elementCount).WithBatchSize(1)) + { + values.Add(entry); + } + + // Verify we received all elements + Assert.Equal(elementCount, values.Count); + + // Verify all expected spans are present and properly parented under test-parent + var testParentTraceId = parent.TraceId.ToString(); + var testParentSpanId = parent.SpanId.ToString(); + + // Find all activities with the ApplicationGrainActivitySourceName + var applicationSpans = Started.Where(a => a.Source.Name == ActivitySources.ApplicationGrainActivitySourceName).ToList(); + + // Find the session span (the logical method call span) + // This should have the method name from the grain interface (e.g., "IAsyncEnumerableActivityGrain/GetActivityDataStream") + var sessionSpans = applicationSpans.Where(a => a.OperationName.Contains("GetActivityDataStream")).ToList(); + Assert.True(sessionSpans.Count >= 1, "Expected at least one session span with GetActivityDataStream operation name"); + + var sessionSpan = sessionSpans.First(); + Assert.Equal(testParentTraceId, sessionSpan.TraceId.ToString()); + Assert.Equal(testParentSpanId, sessionSpan.ParentSpanId.ToString()); + + // Verify the session span has the request ID tag + var requestIdTag = sessionSpan.Tags.FirstOrDefault(t => t.Key == "orleans.async_enumerable.request_id").Value; + Assert.NotNull(requestIdTag); + + var sessionSpanId = sessionSpan.SpanId.ToString(); + + // Find all spans (including runtime spans) to verify parenting + var allSpans = Started.ToList(); + + // Find the StartEnumeration span - should be nested under the session span (in RuntimeActivitySourceName) + // Filter to only client-side spans (those directly parented to the session span) + var startEnumerationSpans = allSpans + .Where(a => a.OperationName.Contains("StartEnumeration") && a.ParentSpanId.ToString() == sessionSpanId) + .ToList(); + Assert.True(startEnumerationSpans.Count >= 1, "Expected at least one StartEnumeration span parented to session span"); + + var startEnumerationSpan = startEnumerationSpans.First(); + Assert.Equal(testParentTraceId, startEnumerationSpan.TraceId.ToString()); + + // Find MoveNext spans - should be nested under the session span (in RuntimeActivitySourceName) + // Filter to only client-side spans (those directly parented to the session span) + var moveNextSpans = allSpans + .Where(a => a.OperationName.Contains("MoveNext") && a.ParentSpanId.ToString() == sessionSpanId) + .ToList(); + Assert.True(moveNextSpans.Count >= 1, $"Expected at least one MoveNext span parented to session span, found {moveNextSpans.Count}"); + + // All client-side MoveNext spans should share the same trace ID + foreach (var moveNextSpan in moveNextSpans) + { + Assert.Equal(testParentTraceId, moveNextSpan.TraceId.ToString()); + } + + // Find DisposeAsync span - should be nested under the session span (in RuntimeActivitySourceName) + // Filter to only client-side spans (those directly parented to the session span) + var disposeSpans = allSpans + .Where(a => a.OperationName.Contains("DisposeAsync") && a.ParentSpanId.ToString() == sessionSpanId) + .ToList(); + Assert.True(disposeSpans.Count >= 1, "Expected at least one DisposeAsync span parented to session span"); + + var disposeSpan = disposeSpans.First(); + Assert.Equal(testParentTraceId, disposeSpan.TraceId.ToString()); + + // Verify each ActivityData received has activity information + // (verifying trace context was propagated into the grain during enumeration) + foreach (var activityData in values) + { + Assert.NotNull(activityData); + Assert.NotNull(activityData.Id); + } + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that OnDeactivateAsync span is created when a grain is deactivated via DeactivateOnIdle. + /// Verifies that the span has proper tags including grain ID, type, silo ID, and deactivation reason. + /// + [Fact] + [TestCategory("BVT")] + public async Task OnDeactivateSpanIsCreatedOnDeactivateOnIdle() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-deactivate"); + parent?.Start(); + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + + // First call should force activation + _ = await grain.GetActivityId(); + + // Trigger deactivation - capture the trace ID before clearing + var testParentTraceId = parent.TraceId.ToString(); + + // Clear activities to focus on deactivation + Started.Clear(); + + // Trigger deactivation + await grain.TriggerDeactivation(); + + // Wait for deactivation to complete - make a call to ensure grain is reactivated (which confirms deactivation happened) + await Task.Delay(500); + + // Make another call to force a new activation (confirming the previous one was deactivated) + _ = await grain.GetActivityId(); + + // Find the OnDeactivate span + var onDeactivateSpans = Started.Where(a => a.OperationName == ActivityNames.OnDeactivate).ToList(); + Assert.True(onDeactivateSpans.Count > 0, "Expected at least one OnDeactivate span to be created during deactivation"); + + var onDeactivateSpan = onDeactivateSpans.First(); + + // Verify the span has expected tags + Assert.NotNull(onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.grain.id").Value); + Assert.NotNull(onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.grain.type").Value); + Assert.NotNull(onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.silo.id").Value); + Assert.NotNull(onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.activation.id").Value); + + // Verify deactivation reason tag + var deactivationReasonTag = onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.deactivation.reason").Value; + Assert.NotNull(deactivationReasonTag); + Assert.Contains("ApplicationRequested", deactivationReasonTag); + + // Verify the OnDeactivate span shares the same trace ID as the parent activity + // This confirms the activity context was propagated from the TriggerDeactivation call + Assert.Equal(testParentTraceId, onDeactivateSpan.TraceId.ToString()); + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that OnDeactivateAsync span captures state writes performed during deactivation. + /// Verifies that storage operations during OnDeactivateAsync are properly traced. + /// + [Fact] + [TestCategory("BVT")] + public async Task OnDeactivateSpanIncludesStorageWriteDuringDeactivation() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-deactivate-storage"); + parent?.Start(); + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + + // First call should force activation + _ = await grain.GetActivityId(); + + // Clear activities to focus on deactivation + Started.Clear(); + + // Trigger deactivation + await grain.TriggerDeactivation(); + + // Wait for deactivation to complete + await Task.Delay(500); + + // Make another call to force a new activation (confirming the previous one was deactivated) + var wasDeactivated = await grain.WasDeactivated(); + Assert.True(wasDeactivated, "Expected grain to have been deactivated"); + + // Find the OnDeactivate span + var onDeactivateSpans = Started.Where(a => a.OperationName == ActivityNames.OnDeactivate).ToList(); + Assert.True(onDeactivateSpans.Count > 0, "Expected at least one OnDeactivate span to be created during deactivation"); + + // Find storage write span - should have been created during OnDeactivateAsync + var storageWriteSpans = Started.Where(a => a.OperationName == ActivityNames.StorageWrite).ToList(); + Assert.True(storageWriteSpans.Count > 0, "Expected at least one storage write span to be created during OnDeactivateAsync"); + + var storageWriteSpan = storageWriteSpans.First(); + Assert.Equal("MemoryGrainStorage", storageWriteSpan.Tags.FirstOrDefault(t => t.Key == "orleans.storage.provider").Value); + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that OnDeactivateAsync span captures exceptions thrown during deactivation. + /// Verifies that the span's error status is set and the exception event is recorded. + /// + [Fact] + [TestCategory("BVT")] + public async Task OnDeactivateSpanCapturesExceptionDuringDeactivation() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-deactivate-exception"); + parent?.Start(); + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + + // First call should force activation + _ = await grain.GetActivityId(); + + // Clear activities to focus on deactivation + Started.Clear(); + + // Trigger deactivation (grain throws exception in OnDeactivateAsync) + await grain.TriggerDeactivation(); + + // Wait for deactivation to complete + await Task.Delay(500); + + // Make another call to force a new activation + _ = await grain.GetActivityId(); + + // Find the OnDeactivate span + var onDeactivateSpans = Started.Where(a => a.OperationName == ActivityNames.OnDeactivate).ToList(); + Assert.True(onDeactivateSpans.Count > 0, "Expected at least one OnDeactivate span to be created during deactivation"); + + var onDeactivateSpan = onDeactivateSpans.First(); + + // Verify the span captured the error + Assert.Equal(ActivityStatusCode.Error, onDeactivateSpan.Status); + + // Verify the span captured the error + Assert.Equal("on-deactivate-failed", onDeactivateSpan.StatusDescription); + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that OnDeactivateAsync span is created during migration and precedes the dehydration span. + /// Verifies the correct ordering: OnDeactivateAsync -> Dehydrate during migration. + /// + [Fact] + [TestCategory("BVT")] + public async Task OnDeactivateSpanPrecedesDehydrateDuringMigration() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-deactivate-migrate"); + parent?.Start(); + try + { + // Create a grain and set some state + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var expectedState = Random.Shared.Next(); + await grain.SetState(expectedState); + var originalAddress = await grain.GetGrainAddress(); + var originalHost = originalAddress.SiloAddress; + + // Find a different silo to migrate to + var targetHost = _fixture.HostedCluster.GetActiveSilos() + .Select(s => s.SiloAddress) + .First(address => address != originalHost); + + // Clear activities to focus on deactivation/migration + Started.Clear(); + + // Trigger migration with a placement hint + RequestContext.Set(IPlacementDirector.PlacementHintKey, targetHost); + await grain.Cast().MigrateOnIdle(); + + // Wait for migration to complete + GrainAddress newAddress; + do + { + await Task.Delay(100); + newAddress = await grain.GetGrainAddress(); + } while (newAddress.ActivationId == originalAddress.ActivationId); + + // Verify the state was preserved + var newState = await grain.GetState(); + Assert.Equal(expectedState, newState); + + // Give some time for all activities to complete + await Task.Delay(500); + + var testParentTraceId = parent.TraceId.ToString(); + + // Find the OnDeactivate span + var onDeactivateSpans = Started.Where(a => a.OperationName == ActivityNames.OnDeactivate).ToList(); + Assert.True(onDeactivateSpans.Count > 0, "Expected at least one OnDeactivate span during migration"); + + // Find the dehydrate span + var dehydrateSpans = Started.Where(a => a.OperationName == ActivityNames.ActivationDehydrate).ToList(); + Assert.True(dehydrateSpans.Count > 0, "Expected at least one dehydrate span during migration"); + + // Verify OnDeactivate started before Dehydrate (as per the FinishDeactivating flow) + var onDeactivateSpan = onDeactivateSpans.First(); + var dehydrateSpan = dehydrateSpans.First(); + + Assert.True(onDeactivateSpan.StartTimeUtc <= dehydrateSpan.StartTimeUtc, + "OnDeactivateAsync should start before or at the same time as Dehydrate"); + + // Verify both spans have proper tags + Assert.NotNull(onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.grain.id").Value); + Assert.Contains("Migrating", onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.deactivation.reason").Value); + + Assert.NotNull(dehydrateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.grain.id").Value); + Assert.Equal(targetHost.ToString(), dehydrateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.migration.target.silo").Value); + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that OnDeactivateAsync span is NOT created for grains that don't implement IGrainBase. + /// Verifies that only grains with OnDeactivateAsync implementation get the span. + /// + [Fact] + [TestCategory("BVT")] + public async Task OnDeactivateSpanIsNotCreatedForNonGrainBaseGrain() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-no-deactivate"); + parent?.Start(); + try + { + // Use a simple grain that doesn't override OnDeactivateAsync (IActivityGrain/ActivityGrain) + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + + // First call should force activation + _ = await grain.GetActivityId(); + + // Clear activities to focus on deactivation + Started.Clear(); + + // Trigger deactivation via IGrainManagementExtension + await grain.Cast().DeactivateOnIdle(); + + // Wait for deactivation to complete + await Task.Delay(500); + + // Make another call to force a new activation + _ = await grain.GetActivityId(); + + // For grains that don't inherit from Grain (which implements IGrainBase), + // OnDeactivateAsync won't be called, so no span should be created + // Note: ActivityGrain doesn't inherit from Grain, it implements IActivityGrain directly + var onDeactivateSpans = Started.Where(a => a.OperationName == ActivityNames.OnDeactivate).ToList(); + + // ActivityGrain doesn't implement IGrainBase, so no OnDeactivate span should be created + Assert.True(onDeactivateSpans.Count == 0, + $"Expected no OnDeactivate spans for grain not implementing IGrainBase, but found {onDeactivateSpans.Count}"); + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that OnDeactivateAsync span properly inherits the trace context from the triggering call. + /// Verifies that when deactivation is triggered, the OnDeactivate span has the same TraceId as the request. + /// + [Fact] + [TestCategory("BVT")] + public async Task OnDeactivateSpanInheritsTraceContextFromTriggeringCall() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-trace-context"); + parent?.Start(); + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + + // First call should force activation + _ = await grain.GetActivityId(); + + var testParentTraceId = parent.TraceId.ToString(); + + // Trigger deactivation - this call's activity context should be propagated to OnDeactivate + await grain.TriggerDeactivation(); + + // Wait for deactivation to complete + await Task.Delay(500); + + // Make another call to force a new activation + _ = await grain.GetActivityId(); + + // Find the OnDeactivate span + var onDeactivateSpans = Started.Where(a => a.OperationName == ActivityNames.OnDeactivate).ToList(); + Assert.True(onDeactivateSpans.Count > 0, "Expected at least one OnDeactivate span"); + + var onDeactivateSpan = onDeactivateSpans.First(); + + // Verify the OnDeactivate span shares the same trace ID as the parent activity + // This confirms trace context propagation works correctly + Assert.Equal(testParentTraceId, onDeactivateSpan.TraceId.ToString()); + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that OnDeactivateAsync span is created during IAsyncEnumerable method execution when the grain calls DeactivateOnIdle. + /// Verifies that the OnDeactivate span is properly parented to the method call (session) span. + /// + [Fact] + [TestCategory("BVT")] + public async Task OnDeactivateSpanIsParentedToAsyncEnumerableMethodCall() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-async-enum-deactivate"); + parent?.Start(); + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var testParentTraceId = parent.TraceId.ToString(); + const int elementCount = 3; + + var values = new List(); + await foreach (var value in grain.GetValuesAndDeactivate(elementCount).WithBatchSize(1)) + { + values.Add(value); + } + + // Verify we received all elements + Assert.Equal(elementCount, values.Count); + + // Wait for deactivation to complete + await Task.Delay(1000); + + // Make another call to force a new activation (confirming the previous one was deactivated) + _ = await grain.GetActivityId(); + + // Find the session span (the logical method call span) + var sessionSpans = Started + .Where(a => a.Source.Name == ActivitySources.ApplicationGrainActivitySourceName + && a.OperationName.Contains("GetValuesAndDeactivate")) + .ToList(); + Assert.True(sessionSpans.Count >= 1, "Expected at least one session span with GetValuesAndDeactivate operation name"); + + var sessionSpan = sessionSpans.First(); + Assert.Equal(testParentTraceId, sessionSpan.TraceId.ToString()); + var sessionSpanId = sessionSpan.SpanId.ToString(); + + // Find the OnDeactivate span + var onDeactivateSpans = Started.Where(a => a.OperationName == ActivityNames.OnDeactivate).ToList(); + Assert.True(onDeactivateSpans.Count > 0, "Expected at least one OnDeactivate span to be created during enumeration"); + + var onDeactivateSpan = onDeactivateSpans.First(); + + // Verify the OnDeactivate span shares the same trace ID as the parent activity + Assert.Equal(testParentTraceId, onDeactivateSpan.TraceId.ToString()); + + // Verify the OnDeactivate span is parented to the session span + // Note: The OnDeactivate might be a descendant (not direct child) of the session span, + // but it should be in the same trace + Assert.Equal(sessionSpan.TraceId, onDeactivateSpan.TraceId); + + // Verify deactivation reason tag + var deactivationReasonTag = onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.deactivation.reason").Value; + Assert.NotNull(deactivationReasonTag); + Assert.Contains("ApplicationRequested", deactivationReasonTag); + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that OnDeactivateAsync span captures proper deactivation reason for different deactivation scenarios. + /// Verifies that the deactivation reason tag reflects the actual reason for deactivation. + /// + [Fact] + [TestCategory("BVT")] + public async Task OnDeactivateSpanHasCorrectReasonTagForMigration() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-reason-migration"); + parent?.Start(); + try + { + // Create a grain and set some state + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var testParentTraceId = parent.TraceId.ToString(); + await grain.SetState(42); + var originalAddress = await grain.GetGrainAddress(); + var originalHost = originalAddress.SiloAddress; + + // Find a different silo to migrate to + var targetHost = _fixture.HostedCluster.GetActiveSilos() + .Select(s => s.SiloAddress) + .First(address => address != originalHost); + + // Clear activities to focus on deactivation/migration + Started.Clear(); + + // Trigger migration + RequestContext.Set(IPlacementDirector.PlacementHintKey, targetHost); + await grain.Cast().MigrateOnIdle(); + + // Wait for migration to complete + GrainAddress newAddress; + do + { + await Task.Delay(100); + newAddress = await grain.GetGrainAddress(); + } while (newAddress.ActivationId == originalAddress.ActivationId); + + // Give some time for all activities to complete + await Task.Delay(500); + + // Find the OnDeactivate span + var onDeactivateSpans = Started.Where(a => a.OperationName == ActivityNames.OnDeactivate).ToList(); + Assert.True(onDeactivateSpans.Count > 0, "Expected at least one OnDeactivate span during migration"); + + var onDeactivateSpan = onDeactivateSpans.First(); + + // Verify the deactivation reason tag indicates migration + var deactivationReasonTag = onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.deactivation.reason").Value; + Assert.NotNull(deactivationReasonTag); + Assert.Contains("Migrating", deactivationReasonTag); + + // Verify the OnDeactivate span shares the same trace ID as the parent activity + Assert.Equal(testParentTraceId, onDeactivateSpan.TraceId.ToString()); + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that OnDeactivateAsync span is created when a grain throws InconsistentStateException + /// and gets deactivated with ApplicationError reason. + /// + [Fact] + [TestCategory("BVT")] + public async Task OnDeactivateSpanIsCreatedForInconsistentStateException() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-inconsistent-state"); + parent?.Start(); + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + + // First call should force activation + _ = await grain.GetActivityId(); + + // This call will throw InconsistentStateException and trigger deactivation + try + { + await grain.ThrowInconsistentStateException(); + } + catch (InconsistentStateException) + { + // Expected + } + + // Wait for deactivation to complete + await Task.Delay(500); + + // Make another call to force a new activation (confirming the previous one was deactivated) + _ = await grain.GetActivityId(); + + // Find the OnDeactivate span + var onDeactivateSpans = Started.Where(a => a.OperationName == ActivityNames.OnDeactivate).ToList(); + Assert.True(onDeactivateSpans.Count > 0, "Expected at least one OnDeactivate span to be created during deactivation"); + + var onDeactivateSpan = onDeactivateSpans.First(); + + // Verify the span has expected tags + Assert.NotNull(onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.grain.id").Value); + Assert.NotNull(onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.grain.type").Value); + + // Verify deactivation reason tag indicates ApplicationError + var deactivationReasonTag = onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.deactivation.reason").Value; + Assert.NotNull(deactivationReasonTag); + Assert.Contains("ApplicationError", deactivationReasonTag); + + // Verify the OnDeactivate span has a valid trace ID + // Note: The trace ID may or may not match our parent activity depending on timing, + // but it should be valid and propagated from somewhere in the call chain + Assert.NotEqual(default(ActivityTraceId).ToString(), onDeactivateSpan.TraceId.ToString()); + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that OnDeactivateAsync span is NOT created when a grain fails during activation (PreviousState != Valid). + /// The OnDeactivate span should only be created when the grain was previously in Valid state. + /// + [Fact] + [TestCategory("BVT")] + public async Task OnDeactivateSpanIsNotCreatedForActivationFailure() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-activation-failure"); + parent?.Start(); + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + + // Clear activities to focus on activation/deactivation + Started.Clear(); + + // First call should trigger activation which will fail + try + { + await grain.GetActivityId(); + } + catch + { + // Expected - activation fails + } + + // Wait for any deactivation to complete + await Task.Delay(5000); + + // Find the OnDeactivate span - should NOT exist because the grain was never in Valid state + var onDeactivateSpans = Started.Where(a => a.OperationName == ActivityNames.OnDeactivate).ToList(); + Assert.True(onDeactivateSpans.Count == 0, + $"Expected no OnDeactivate spans for grain that failed during activation, but found {onDeactivateSpans.Count}"); + + // Verify the activation span was created + var activationSpans = Started.Where(a => a.OperationName == ActivityNames.ActivateGrain).ToList(); + Assert.True(activationSpans.Count > 0, "Expected at least one activation span"); + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that OnDeactivateAsync span is created when a grain deactivates itself using GrainContext.Deactivate. + /// This tests the programmatic deactivation path within the grain. + /// + [Fact] + [TestCategory("BVT")] + public async Task OnDeactivateSpanIsCreatedForGrainContextDeactivate() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-grain-context-deactivate"); + parent?.Start(); + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var testParentTraceId = parent.TraceId.ToString(); + + // First call should force activation + _ = await grain.GetActivityId(); + + // Clear activities to focus on deactivation + Started.Clear(); + + // Trigger deactivation using GrainContext.Deactivate with custom reason + await grain.DeactivateWithCustomReason("Custom deactivation reason for testing"); + + // Wait for deactivation to complete + await Task.Delay(500); + + // Make another call to force a new activation (confirming the previous one was deactivated) + _ = await grain.GetActivityId(); + + // Find the OnDeactivate span + var onDeactivateSpans = Started.Where(a => a.OperationName == ActivityNames.OnDeactivate).ToList(); + Assert.True(onDeactivateSpans.Count > 0, "Expected at least one OnDeactivate span to be created during deactivation"); + + var onDeactivateSpan = onDeactivateSpans.First(); + + // Verify the span has expected tags + Assert.NotNull(onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.grain.id").Value); + Assert.NotNull(onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.grain.type").Value); + + // Verify deactivation reason tag indicates ApplicationRequested with custom message + var deactivationReasonTag = onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.deactivation.reason").Value; + Assert.NotNull(deactivationReasonTag); + Assert.Contains("ApplicationRequested", deactivationReasonTag); + Assert.Contains("Custom deactivation reason for testing", deactivationReasonTag); + + // Verify the OnDeactivate span shares the same trace ID as the parent activity + Assert.Equal(testParentTraceId, onDeactivateSpan.TraceId.ToString()); + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that the OnDeactivate span properly captures the activity context when deactivation + /// is triggered externally via IGrainManagementExtension.DeactivateOnIdle. + /// + [Fact] + [TestCategory("BVT")] + public async Task OnDeactivateSpanHasCorrectParentWhenTriggeredExternally() + { + Started.Clear(); + + using var parent = ActivitySources.ApplicationGrainSource.StartActivity("test-parent-external-deactivate"); + parent?.Start(); + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var testParentTraceId = parent.TraceId.ToString(); + + // First call should force activation + _ = await grain.GetActivityId(); + + // Clear activities to focus on deactivation + Started.Clear(); + + // Trigger deactivation externally via IGrainManagementExtension + await grain.Cast().DeactivateOnIdle(); + + // Wait for deactivation to complete + await Task.Delay(500); + + // Make another call to force a new activation (confirming the previous one was deactivated) + _ = await grain.GetActivityId(); + + // Find the OnDeactivate span + var onDeactivateSpans = Started.Where(a => a.OperationName == ActivityNames.OnDeactivate).ToList(); + Assert.True(onDeactivateSpans.Count > 0, "Expected at least one OnDeactivate span to be created during deactivation"); + + var onDeactivateSpan = onDeactivateSpans.First(); + + // Verify the OnDeactivate span shares the same trace ID as the parent activity + // This confirms the activity context was propagated from the DeactivateOnIdle call + Assert.Equal(testParentTraceId, onDeactivateSpan.TraceId.ToString()); + + // Verify deactivation reason tag + var deactivationReasonTag = onDeactivateSpan.Tags.FirstOrDefault(t => t.Key == "orleans.deactivation.reason").Value; + Assert.NotNull(deactivationReasonTag); + Assert.Contains("ApplicationRequested", deactivationReasonTag); + } + finally + { + parent?.Stop(); + AssertNoApplicationSpansParentedByRuntimeSpans(); + PrintActivityDiagnostics(); + } + } + + /// + /// Asserts that no spans from ApplicationGrainActivitySourceName have parents from RuntimeActivitySourceName. + /// This ensures that if only ApplicationGrainActivitySourceName has been added (without RuntimeActivitySourceName), + /// there won't be any hanging traces put at root because of missing RuntimeActivitySourceName spans + /// that would otherwise propagate the trace context. + /// + private void AssertNoApplicationSpansParentedByRuntimeSpans() + { + var activities = Started.ToList(); + var activityById = activities + .Where(a => a.Id is not null) + .ToDictionary(a => a.Id!); + + var applicationSpans = activities + .Where(a => a.Source.Name == ActivitySources.ApplicationGrainActivitySourceName) + .ToList(); + + var violations = new List<(Activity Child, Activity Parent)>(); + + foreach (var appSpan in applicationSpans) + { + if (appSpan.ParentId is not null && activityById.TryGetValue(appSpan.ParentId, out var parentActivity)) + { + if (parentActivity.Source.Name == ActivitySources.RuntimeActivitySourceName) + { + violations.Add((appSpan, parentActivity)); + } + } + } + + if (violations.Count > 0) + { + var sb = new StringBuilder(); + sb.AppendLine($"Found {violations.Count} ApplicationGrainActivitySourceName span(s) with RuntimeActivitySourceName parent(s):"); + foreach (var (child, violationParent) in violations) + { + sb.AppendLine($" - Application span '{child.OperationName}' (Id: {child.Id}) has Runtime parent '{violationParent.OperationName}' (Id: {violationParent.Id})"); + } + Assert.Fail(sb.ToString()); + } + } + + private void PrintActivityDiagnostics() + { + var activities = Started.ToList(); + if (activities.Count == 0) + { + _output.WriteLine("No activities captured."); + return; + } + + var sb = new StringBuilder(); + sb.AppendLine(); + sb.AppendLine("╔══════════════════════════════════════════════════════════════════════════════╗"); + sb.AppendLine("║ CAPTURED ACTIVITIES DIAGNOSTIC ║"); + sb.AppendLine("╠══════════════════════════════════════════════════════════════════════════════╣"); + sb.AppendLine($"║ Total Activities: {activities.Count,-59}║"); + sb.AppendLine("╚══════════════════════════════════════════════════════════════════════════════╝"); + sb.AppendLine(); + + // Group by source + var bySource = activities.GroupBy(a => a.Source.Name).OrderBy(g => g.Key); + + foreach (var sourceGroup in bySource) + { + sb.AppendLine($"┌─ Source: {sourceGroup.Key}"); + sb.AppendLine("│"); + + var sourceActivities = sourceGroup.OrderBy(a => a.StartTimeUtc).ToList(); + for (int i = 0; i < sourceActivities.Count; i++) + { + var activity = sourceActivities[i]; + var isLast = i == sourceActivities.Count - 1; + var prefix = isLast ? "└──" : "├──"; + var continuePrefix = isLast ? " " : "│ "; + + sb.AppendLine($"│ {prefix} [{activity.OperationName}]"); + sb.AppendLine($"│ {continuePrefix} ID: {activity.Id ?? "(null)"}"); + + if (activity.ParentId is not null) + { + sb.AppendLine($"│ {continuePrefix} Parent: {activity.ParentId}"); + } + else + { + sb.AppendLine($"│ {continuePrefix} Parent: (root)"); + } + + sb.AppendLine($"│ {continuePrefix} Duration: {activity.Duration.TotalMilliseconds:F2}ms"); + sb.AppendLine($"│ {continuePrefix} Status: {activity.Status}"); + + var tags = activity.Tags.ToList(); + if (tags.Count > 0) + { + sb.AppendLine($"│ {continuePrefix} Tags:"); + foreach (var tag in tags) + { + sb.AppendLine($"│ {continuePrefix} • {tag.Key}: {tag.Value}"); + } + } + + sb.AppendLine("│"); + } + + sb.AppendLine(); + } + + // Print hierarchy view + sb.AppendLine("═══════════════════════════════════════════════════════════════════════════════"); + sb.AppendLine(" ACTIVITY HIERARCHY "); + sb.AppendLine("═══════════════════════════════════════════════════════════════════════════════"); + sb.AppendLine(); + + var activityById = activities.Where(a => a.Id is not null).ToDictionary(a => a.Id!); + var roots = activities.Where(a => a.ParentId is null || !activityById.ContainsKey(a.ParentId)).ToList(); + + foreach (var root in roots.OrderBy(a => a.StartTimeUtc)) + { + PrintActivityTree(sb, root, activityById, activities, "", true); + } + + _output.WriteLine(sb.ToString()); + } + + private static void PrintActivityTree( + StringBuilder sb, + Activity activity, + Dictionary activityById, + List allActivities, + string indent, + bool isLast) + { + var marker = isLast ? "└── " : "├── "; + var durationStr = activity.Duration.TotalMilliseconds > 0 + ? $" ({activity.Duration.TotalMilliseconds:F2}ms)" + : ""; + + sb.AppendLine($"{indent}{marker}[{activity.Source.Name}] {activity.OperationName}{durationStr}"); + + var children = allActivities + .Where(a => a.ParentId == activity.Id) + .OrderBy(a => a.StartTimeUtc) + .ToList(); + + var childIndent = indent + (isLast ? " " : "│ "); + + for (int i = 0; i < children.Count; i++) + { + PrintActivityTree(sb, children[i], activityById, allActivities, childIndent, i == children.Count - 1); + } + } + } + + #region Test Grains for Deactivation Tracing + + /// + /// Test grain interface for basic deactivation tracing tests. + /// + public interface IDeactivationTracingTestGrain : IGrainWithIntegerKey + { + Task GetActivityId(); + Task TriggerDeactivation(); + } + + /// + /// Test grain implementation for basic deactivation tracing tests. + /// Implements a simple OnDeactivateAsync to verify the span is created. + /// + public class DeactivationTracingTestGrain : Grain, IDeactivationTracingTestGrain + { + public Task GetActivityId() + { + var activity = Activity.Current; + if (activity is null) + { + return Task.FromResult(default(ActivityData)); + } + + return Task.FromResult(new ActivityData + { + Id = activity.Id, + TraceState = activity.TraceStateString, + Baggage = activity.Baggage.ToList(), + }); + } + + public Task TriggerDeactivation() + { + this.DeactivateOnIdle(); + return Task.CompletedTask; + } + + public override Task OnDeactivateAsync(DeactivationReason reason, CancellationToken cancellationToken) + { + // Simple deactivation logic to ensure OnDeactivateAsync is called + return Task.CompletedTask; + } + } + + /// + /// Test grain interface for deactivation tracing with work in OnDeactivateAsync. + /// + public interface IDeactivationWithWorkTracingTestGrain : IGrainWithIntegerKey + { + Task GetActivityId(); + Task TriggerDeactivation(); + Task WasDeactivated(); + } + + /// + /// Test grain state for tracking deactivation. + /// + [GenerateSerializer] + public class DeactivationWorkState + { + [Id(0)] + public bool WasDeactivated { get; set; } + + [Id(1)] + public string DeactivationReason { get; set; } + } + + /// + /// Test grain implementation that performs work during OnDeactivateAsync. + /// Uses persistent state to track that deactivation occurred. + /// + public class DeactivationWithWorkTracingTestGrain : Grain, IDeactivationWithWorkTracingTestGrain + { + private readonly IPersistentState _state; + + public DeactivationWithWorkTracingTestGrain( + [PersistentState("deactivationState")] IPersistentState state) + { + _state = state; + } + + public Task GetActivityId() + { + var activity = Activity.Current; + if (activity is null) + { + return Task.FromResult(default(ActivityData)); + } + + return Task.FromResult(new ActivityData + { + Id = activity.Id, + TraceState = activity.TraceStateString, + Baggage = activity.Baggage.ToList(), + }); + } + + public Task TriggerDeactivation() + { + this.DeactivateOnIdle(); + return Task.CompletedTask; + } + + public Task WasDeactivated() => Task.FromResult(_state.State.WasDeactivated); + + public override async Task OnDeactivateAsync(DeactivationReason reason, CancellationToken cancellationToken) + { + // Perform work during deactivation - write state + _state.State.WasDeactivated = true; + _state.State.DeactivationReason = reason.ToString(); + await _state.WriteStateAsync(); + } + } + + /// + /// Test grain interface for deactivation tracing with exception in OnDeactivateAsync. + /// + public interface IDeactivationWithExceptionTracingTestGrain : IGrainWithIntegerKey + { + Task GetActivityId(); + Task TriggerDeactivation(); + } + + /// + /// Test grain implementation that throws an exception during OnDeactivateAsync. + /// Used to verify that the OnDeactivate span captures errors correctly. + /// + public class DeactivationWithExceptionTracingTestGrain : Grain, IDeactivationWithExceptionTracingTestGrain + { + public Task GetActivityId() + { + var activity = Activity.Current; + if (activity is null) + { + return Task.FromResult(default(ActivityData)); + } + + return Task.FromResult(new ActivityData + { + Id = activity.Id, + TraceState = activity.TraceStateString, + Baggage = activity.Baggage.ToList(), + }); + } + + public Task TriggerDeactivation() + { + this.DeactivateOnIdle(); + return Task.CompletedTask; + } + + public override Task OnDeactivateAsync(DeactivationReason reason, CancellationToken cancellationToken) + { + throw new InvalidOperationException("Simulated error during deactivation"); + } + } + + /// + /// Test grain interface for deactivation tracing with migration participant. + /// + public interface IDeactivationMigrationTracingTestGrain : IGrainWithIntegerKey + { + ValueTask GetGrainAddress(); + ValueTask SetState(int state); + ValueTask GetState(); + ValueTask TriggerDeactivation(); + } + + /// + /// Test grain implementation that implements IGrainMigrationParticipant for deactivation tracing. + /// Used to verify OnDeactivate span is created before dehydration during migration. + /// + [RandomPlacement] + public class DeactivationMigrationTracingTestGrain : Grain, IDeactivationMigrationTracingTestGrain, IGrainMigrationParticipant + { + private int _state; + private bool _onDeactivateCalled; + + public ValueTask GetState() => new(_state); + + public ValueTask SetState(int state) + { + _state = state; + return default; + } + + public ValueTask TriggerDeactivation() + { + this.DeactivateOnIdle(); + return default; + } + + public override Task OnDeactivateAsync(DeactivationReason reason, CancellationToken cancellationToken) + { + _onDeactivateCalled = true; + return Task.CompletedTask; + } + + public void OnDehydrate(IDehydrationContext migrationContext) + { + migrationContext.TryAddValue("state", _state); + migrationContext.TryAddValue("onDeactivateCalled", _onDeactivateCalled); + } + + public void OnRehydrate(IRehydrationContext migrationContext) + { + migrationContext.TryGetValue("state", out _state); + migrationContext.TryGetValue("onDeactivateCalled", out _onDeactivateCalled); + } + + public ValueTask GetGrainAddress() => new(GrainContext.Address); + } + + /// + /// Test grain interface for InconsistentStateException deactivation tracing tests. + /// + public interface IInconsistentStateDeactivationGrain : IGrainWithIntegerKey + { + Task GetActivityId(); + Task ThrowInconsistentStateException(); + } + + /// + /// Test grain implementation that throws InconsistentStateException. + /// Used to verify OnDeactivate span is created with ApplicationError reason. + /// + public class InconsistentStateDeactivationGrain : Grain, IInconsistentStateDeactivationGrain + { + public Task GetActivityId() + { + var activity = Activity.Current; + if (activity is null) + { + return Task.FromResult(default(ActivityData)); + } + + return Task.FromResult(new ActivityData + { + Id = activity.Id, + TraceState = activity.TraceStateString, + Baggage = activity.Baggage.ToList(), + }); + } + + public Task ThrowInconsistentStateException() + { + throw new InconsistentStateException("Simulated inconsistent state for testing deactivation tracing") + { + IsSourceActivation = true + }; + } + + public override Task OnDeactivateAsync(DeactivationReason reason, CancellationToken cancellationToken) + { + // Simple deactivation logic + return Task.CompletedTask; + } + } + + /// + /// Test grain interface for activation failure deactivation tracing tests. + /// + public interface IActivationFailureDeactivationGrain : IGrainWithIntegerKey + { + Task GetActivityId(); + } + + /// + /// Test grain implementation that fails during activation. + /// Used to verify OnDeactivate span is NOT created when PreviousState != Valid. + /// + public class ActivationFailureDeactivationGrain : Grain, IActivationFailureDeactivationGrain + { + public ActivationFailureDeactivationGrain() + { + // Throw exception in constructor to fail activation + throw new InvalidOperationException("Simulated activation failure for testing deactivation tracing"); + } + + public Task GetActivityId() + { + var activity = Activity.Current; + if (activity is null) + { + return Task.FromResult(default(ActivityData)); + } + + return Task.FromResult(new ActivityData + { + Id = activity.Id, + TraceState = activity.TraceStateString, + Baggage = activity.Baggage.ToList(), + }); + } + + public override Task OnDeactivateAsync(DeactivationReason reason, CancellationToken cancellationToken) + { + // This should never be called since activation fails + return Task.CompletedTask; + } + } + + /// + /// Test grain interface for GrainContext.Deactivate deactivation tracing tests. + /// + public interface IGrainContextDeactivationGrain : IGrainWithIntegerKey + { + Task GetActivityId(); + Task DeactivateWithCustomReason(string reason); + } + + /// + /// Test grain implementation that uses GrainContext.Deactivate with custom reason. + /// Used to verify OnDeactivate span is created with the custom reason. + /// + public class GrainContextDeactivationGrain : Grain, IGrainContextDeactivationGrain + { + public Task GetActivityId() + { + var activity = Activity.Current; + if (activity is null) + { + return Task.FromResult(default(ActivityData)); + } + + return Task.FromResult(new ActivityData + { + Id = activity.Id, + TraceState = activity.TraceStateString, + Baggage = activity.Baggage.ToList(), + }); + } + + public Task DeactivateWithCustomReason(string reason) + { + GrainContext.Deactivate(new DeactivationReason(DeactivationReasonCode.ApplicationRequested, reason)); + return Task.CompletedTask; + } + + public override Task OnDeactivateAsync(DeactivationReason reason, CancellationToken cancellationToken) + { + // Simple deactivation logic + return Task.CompletedTask; + } + } + + #endregion + + #region Test Placement Filter for Tracing + + /// + /// Test placement filter attribute for tracing tests. + /// + public class TracingTestPlacementFilterAttribute() : PlacementFilterAttribute(new TracingTestPlacementFilterStrategy()); + + /// + /// Test placement filter strategy for tracing tests. + /// + public class TracingTestPlacementFilterStrategy() : PlacementFilterStrategy(order: 1) + { + } + + /// + /// Test placement filter director that simply passes through all silos. + /// + public class TracingTestPlacementFilterDirector : IPlacementFilterDirector + { + public IEnumerable Filter(PlacementFilterStrategy filterStrategy, PlacementTarget target, IEnumerable silos) + { + return silos; + } + } + + /// + /// Second test placement filter attribute for tracing tests with multiple filters. + /// + public class SecondTracingTestPlacementFilterAttribute() : PlacementFilterAttribute(new SecondTracingTestPlacementFilterStrategy()); + + /// + /// Second test placement filter strategy for tracing tests with multiple filters. + /// + public class SecondTracingTestPlacementFilterStrategy() : PlacementFilterStrategy(order: 2) + { + } + + /// + /// Second test placement filter director that simply passes through all silos. + /// + public class SecondTracingTestPlacementFilterDirector : IPlacementFilterDirector + { + public IEnumerable Filter(PlacementFilterStrategy filterStrategy, PlacementTarget target, IEnumerable silos) + { + return silos; + } + } + + /// + /// Test grain interface with a placement filter for tracing tests. + /// + public interface IFilteredActivityGrain : IGrainWithIntegerKey + { + Task GetActivityId(); + } + + /// + /// Test grain implementation with a placement filter for tracing tests. + /// + [TracingTestPlacementFilter] + public class FilteredActivityGrain : Grain, IFilteredActivityGrain + { + public Task GetActivityId() + { + var activity = Activity.Current; + if (activity is null) + { + return Task.FromResult(default(ActivityData)); + } + + var result = new ActivityData() + { + Id = activity.Id, + TraceState = activity.TraceStateString, + Baggage = activity.Baggage.ToList(), + }; + + return Task.FromResult(result); + } + } + + /// + /// Test grain interface with multiple placement filters for tracing tests. + /// + public interface IMultiFilteredActivityGrain : IGrainWithIntegerKey + { + Task GetActivityId(); + } + + /// + /// Test grain implementation with multiple placement filters for tracing tests. + /// + [TracingTestPlacementFilter] + [SecondTracingTestPlacementFilter] + public class MultiFilteredActivityGrain : Grain, IMultiFilteredActivityGrain + { + public Task GetActivityId() + { + var activity = Activity.Current; + if (activity is null) + { + return Task.FromResult(default(ActivityData)); + } + + var result = new ActivityData() + { + Id = activity.Id, + TraceState = activity.TraceStateString, + Baggage = activity.Baggage.ToList(), + }; + + return Task.FromResult(result); + } + } + + #endregion + + #region Test Grain with Persistent State for tracing + + /// + /// Test grain interface with persistent state for tracing tests. + /// + public interface IPersistentStateActivityGrain : IGrainWithIntegerKey + { + Task GetActivityId(); + Task GetStateValue(); + } + + /// + /// Test grain state for persistent state tracing tests. + /// + [GenerateSerializer] + public class PersistentStateActivityGrainState + { + [Id(0)] + public int Value { get; set; } + } + + /// + /// Test grain implementation with persistent state for tracing tests. + /// + [TracingTestPlacementFilter] + public class PersistentStateActivityGrain : Grain, IPersistentStateActivityGrain + { + private readonly IPersistentState _state; + + public PersistentStateActivityGrain( + [PersistentState("state")] IPersistentState state) + { + _state = state; + } + + public Task GetActivityId() + { + var activity = Activity.Current; + if (activity is null) + { + return Task.FromResult(default(ActivityData)); + } + + var result = new ActivityData() + { + Id = activity.Id, + TraceState = activity.TraceStateString, + Baggage = activity.Baggage.ToList(), + }; + + return Task.FromResult(result); + } + + public Task GetStateValue() + { + return Task.FromResult(_state.State.Value); + } + } + + #endregion + + #region Test Grain for Migration Tracing + + /// + /// Test grain interface for migration tracing tests. + /// + public interface IMigrationTracingTestGrain : IGrainWithIntegerKey + { + ValueTask GetGrainAddress(); + ValueTask SetState(int state); + ValueTask GetState(); + } + + /// + /// Test grain state for migration tracing tests. + /// + [GenerateSerializer] + public class MigrationTracingTestGrainState + { + [Id(0)] + public int Value { get; set; } + } + + /// + /// Test grain implementation for migration tracing tests. + /// Implements IGrainMigrationParticipant to participate in migration. + /// Uses RandomPlacement to allow migration to different silos. + /// + [RandomPlacement] + public class MigrationTracingTestGrain : Grain, IMigrationTracingTestGrain, IGrainMigrationParticipant + { + private int _state; + + public ValueTask GetState() => new(_state); + + public ValueTask SetState(int state) + { + _state = state; + return default; + } + + public void OnDehydrate(IDehydrationContext migrationContext) + { + migrationContext.TryAddValue("state", _state); + } + + public void OnRehydrate(IRehydrationContext migrationContext) + { + migrationContext.TryGetValue("state", out _state); + } + + public ValueTask GetGrainAddress() => new(GrainContext.Address); + } + + /// + /// Test grain interface for migration tracing tests without IGrainMigrationParticipant. + /// + + /// + /// Test grain interface for migration tracing tests with a placement filter. + /// + public interface IMigrationFilterTracingTestGrain : IGrainWithIntegerKey + { + ValueTask GetGrainAddress(); + ValueTask SetState(int state); + ValueTask GetState(); + } + + /// + /// Test grain implementation for migration tracing tests with a placement filter. + /// Combines IGrainMigrationParticipant and a placement filter to verify that + /// FilterPlacementCandidates spans are properly parented under PlaceGrain during migration. + /// + [RandomPlacement] + [TracingTestPlacementFilter] + public class MigrationFilterTracingTestGrain : Grain, IMigrationFilterTracingTestGrain, IGrainMigrationParticipant + { + private int _state; + + public ValueTask GetState() => new(_state); + + public ValueTask SetState(int state) + { + _state = state; + return default; + } + + public void OnDehydrate(IDehydrationContext migrationContext) + { + migrationContext.TryAddValue("state", _state); + } + + public void OnRehydrate(IRehydrationContext migrationContext) + { + migrationContext.TryGetValue("state", out _state); + } + + public ValueTask GetGrainAddress() => new(GrainContext.Address); + } + public interface ISimpleMigrationTracingTestGrain : IGrainWithIntegerKey + { + ValueTask GetGrainAddress(); + ValueTask SetState(int state); + ValueTask GetState(); + } + + /// + /// Test grain implementation for migration tracing tests that does NOT implement IGrainMigrationParticipant. + /// Uses RandomPlacement to allow migration to different silos. + /// This grain will lose its state during migration since it doesn't participate in dehydration/rehydration. + /// + [RandomPlacement] + public class SimpleMigrationTracingTestGrain : Grain, ISimpleMigrationTracingTestGrain + { + private int _state; + + public ValueTask GetState() => new(_state); + + public ValueTask SetState(int state) + { + _state = state; + return default; + } + + public ValueTask GetGrainAddress() => new(GrainContext.Address); + } + + /// + /// Test grain interface with persistent state for migration tracing tests. + /// + public interface IMigrationPersistentStateTracingTestGrain : IGrainWithIntegerKey + { + ValueTask SetState(int a, int b); + ValueTask<(int A, int B)> GetState(); + ValueTask GetGrainAddress(); + } + + /// + /// Test grain implementation with IPersistentState for migration tracing tests. + /// Uses RandomPlacement to allow migration to different silos. + /// + [RandomPlacement] + public class MigrationPersistentStateTracingTestGrain : Grain, IMigrationPersistentStateTracingTestGrain + { + private readonly IPersistentState _stateA; + private readonly IPersistentState _stateB; + + public MigrationPersistentStateTracingTestGrain( + [PersistentState("a")] IPersistentState stateA, + [PersistentState("b")] IPersistentState stateB) + { + _stateA = stateA; + _stateB = stateB; + } + + public ValueTask<(int A, int B)> GetState() => new((_stateA.State.Value, _stateB.State.Value)); + + public ValueTask SetState(int a, int b) + { + _stateA.State.Value = a; + _stateB.State.Value = b; + return default; + } + + public ValueTask GetGrainAddress() => new(GrainContext.Address); + } + + #endregion + + #region Test Grain for IAsyncEnumerable with Deactivation + + /// + /// Test grain interface for IAsyncEnumerable deactivation tracing tests. + /// + public interface IAsyncEnumerableDeactivationGrain : IGrainWithIntegerKey + { + IAsyncEnumerable GetValuesAndDeactivate(int count); + Task GetActivityId(); + } + + /// + /// Grain call filter that triggers deactivation after DisposeAsync is called on an async enumerable. + /// This ensures deactivation happens after the enumeration is fully complete. + /// + public class DeactivateAfterDisposeAsyncFilter : IIncomingGrainCallFilter + { + public async Task Invoke(IIncomingGrainCallContext context) + { + await context.Invoke(); + + // Check if this is the DisposeAsync call for async enumerable + if (context.InterfaceMethod?.Name == "DisposeAsync" && + context.InterfaceMethod.DeclaringType?.FullName == "Orleans.Runtime.IAsyncEnumerableGrainExtension") + { + // Trigger deactivation on the grain + if (context.Grain is Grain grain) + { + grain.DeactivateOnIdle(); + } + } + } + } + + /// + /// Test grain implementation that yields values via IAsyncEnumerable and then deactivates after DisposeAsync. + /// Uses a grain call filter to trigger deactivation after the async enumerable is disposed. + /// + public class AsyncEnumerableDeactivationGrain : Grain, IAsyncEnumerableDeactivationGrain + { + public async IAsyncEnumerable GetValuesAndDeactivate(int count) + { + for (int i = 0; i < count; i++) + { + await Task.Delay(10); // Small delay to simulate work + yield return i; + } + } + + public Task GetActivityId() + { + var activity = Activity.Current; + if (activity is null) + { + return Task.FromResult(default(ActivityData)); + } + + return Task.FromResult(new ActivityData + { + Id = activity.Id, + TraceState = activity.TraceStateString, + Baggage = activity.Baggage.ToList(), + }); + } + + public override Task OnDeactivateAsync(DeactivationReason reason, CancellationToken cancellationToken) + { + // Simple deactivation logic to ensure OnDeactivateAsync is called + return Task.CompletedTask; + } + } + + #endregion + + #region Trace Context Propagation Tests + + /// + /// Test grain interface for verifying trace context propagation from client to grain. + /// Returns detailed trace information to verify the server received the correct trace context. + /// + public interface ITraceContextPropagationGrain : IGrainWithIntegerKey + { + /// + /// Returns detailed trace information from the server-side Activity.Current. + /// This allows the test to verify that trace context was properly propagated. + /// + Task GetTraceContextInfo(); + + /// + /// Makes a call to another grain and returns both the local and nested trace context. + /// Used to verify trace context propagation across grain-to-grain calls. + /// + Task<(TraceContextInfo Local, TraceContextInfo Nested)> GetNestedTraceContextInfo(); + } + + /// + /// Detailed trace context information returned from grain calls. + /// + [GenerateSerializer] + public class TraceContextInfo + { + [Id(0)] + public string ActivityId { get; set; } + + [Id(1)] + public string TraceId { get; set; } + + [Id(2)] + public string SpanId { get; set; } + + [Id(3)] + public string ParentSpanId { get; set; } + + [Id(4)] + public string ParentId { get; set; } + + [Id(5)] + public string OperationName { get; set; } + + [Id(6)] + public string TraceParentFromRequestContext { get; set; } + + [Id(7)] + public bool HasActivity { get; set; } + + [Id(8)] + public ActivityKind Kind { get; set; } + + [Id(9)] + public bool IsRemote { get; set; } + } + + /// + /// Test grain implementation for verifying trace context propagation. + /// + public class TraceContextPropagationGrain : Grain, ITraceContextPropagationGrain + { + public Task GetTraceContextInfo() + { + var activity = Activity.Current; + var traceParent = RequestContext.Get("traceparent") as string; + + return Task.FromResult(new TraceContextInfo + { + HasActivity = activity is not null, + ActivityId = activity?.Id, + TraceId = activity?.TraceId.ToString(), + SpanId = activity?.SpanId.ToString(), + ParentSpanId = activity?.ParentSpanId.ToString(), + ParentId = activity?.ParentId, + OperationName = activity?.OperationName, + Kind = activity?.Kind ?? ActivityKind.Internal, + IsRemote = activity?.HasRemoteParent ?? false, + TraceParentFromRequestContext = traceParent + }); + } + + public async Task<(TraceContextInfo Local, TraceContextInfo Nested)> GetNestedTraceContextInfo() + { + var localInfo = await GetTraceContextInfo(); + + // Make a nested call to another grain + var nestedGrain = GrainFactory.GetGrain(this.GetPrimaryKeyLong() + 1); + var nestedInfo = await nestedGrain.GetTraceContextInfo(); + + return (localInfo, nestedInfo); + } + } + + #endregion + + /// + /// Tests specifically for verifying trace context propagation between client and grain server. + /// These tests expose issues where the server-side span starts a new trace instead of continuing the client's trace. + /// + [Collection("ActivationTracing")] + public class GrainCallTraceContextPropagationTests : OrleansTestingBase, IClassFixture + { + private static readonly ConcurrentBag Started = new(); + + static GrainCallTraceContextPropagationTests() + { + var listener = new ActivityListener + { + ShouldListenTo = src => src.Name == ActivitySources.ApplicationGrainActivitySourceName + || src.Name == ActivitySources.LifecycleActivitySourceName + || src.Name == ActivitySources.StorageActivitySourceName, + Sample = (ref _) => ActivitySamplingResult.AllData, + SampleUsingParentId = (ref _) => ActivitySamplingResult.AllData, + ActivityStarted = activity => Started.Add(activity), + }; + ActivitySource.AddActivityListener(listener); + } + + private readonly ActivationTracingTests.Fixture _fixture; + private readonly ITestOutputHelper _output; + + public GrainCallTraceContextPropagationTests(ActivationTracingTests.Fixture fixture, ITestOutputHelper output) + { + _fixture = fixture; + _output = output; + } + + /// + /// CRITICAL TEST: Verifies that the server-side grain call activity has the same TraceId as the client. + /// This test fails if trace context propagation is broken - the server will start a new trace instead + /// of continuing the client's trace. + /// + [Fact] + [TestCategory("BVT")] + public async Task ServerSideGrainCallSharesSameTraceIdAsClient() + { + Started.Clear(); + + // Start a parent activity on the client side + using var clientActivity = ActivitySources.ApplicationGrainSource.StartActivity("client-parent-activity"); + clientActivity?.Start(); + + Assert.NotNull(clientActivity); + var clientTraceId = clientActivity.TraceId.ToString(); + var clientSpanId = clientActivity.SpanId.ToString(); + + _output.WriteLine($"Client TraceId: {clientTraceId}"); + _output.WriteLine($"Client SpanId: {clientSpanId}"); + + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + + // This call should propagate the trace context to the server + var serverTraceInfo = await grain.GetTraceContextInfo(); + + _output.WriteLine($"Server HasActivity: {serverTraceInfo.HasActivity}"); + _output.WriteLine($"Server TraceId: {serverTraceInfo.TraceId}"); + _output.WriteLine($"Server SpanId: {serverTraceInfo.SpanId}"); + _output.WriteLine($"Server ParentSpanId: {serverTraceInfo.ParentSpanId}"); + _output.WriteLine($"Server ParentId: {serverTraceInfo.ParentId}"); + _output.WriteLine($"Server OperationName: {serverTraceInfo.OperationName}"); + _output.WriteLine($"Server Kind: {serverTraceInfo.Kind}"); + _output.WriteLine($"Server IsRemote: {serverTraceInfo.IsRemote}"); + _output.WriteLine($"Server TraceParentFromRequestContext: {serverTraceInfo.TraceParentFromRequestContext}"); + + // CRITICAL ASSERTION: Server must have an activity + Assert.True(serverTraceInfo.HasActivity, "Server-side grain call should have an Activity.Current"); + + // CRITICAL ASSERTION: Server TraceId must match client TraceId + // If this fails, trace context propagation is broken! + Assert.Equal(clientTraceId, serverTraceInfo.TraceId); + } + finally + { + clientActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + /// + /// Verifies that the server-side activity is a Server kind and has a remote parent. + /// This confirms proper W3C trace context handling. + /// + [Fact] + [TestCategory("BVT")] + public async Task ServerSideActivityHasCorrectKindAndRemoteParent() + { + Started.Clear(); + + using var clientActivity = ActivitySources.ApplicationGrainSource.StartActivity("client-activity-kind-test"); + clientActivity?.Start(); + + Assert.NotNull(clientActivity); + + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var serverTraceInfo = await grain.GetTraceContextInfo(); + + _output.WriteLine($"Server Kind: {serverTraceInfo.Kind}"); + _output.WriteLine($"Server IsRemote: {serverTraceInfo.IsRemote}"); + + Assert.True(serverTraceInfo.HasActivity, "Server-side grain call should have an Activity.Current"); + + // Server-side activity should be of kind Server + Assert.Equal(ActivityKind.Server, serverTraceInfo.Kind); + + // Server-side activity should have a remote parent (propagated from client) + Assert.True(serverTraceInfo.IsRemote, "Server-side activity should have HasRemoteParent=true"); + } + finally + { + clientActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + /// + /// Verifies trace context propagation across nested grain-to-grain calls. + /// All calls in the chain should share the same TraceId. + /// + [Fact] + [TestCategory("BVT")] + public async Task NestedGrainCallsShareSameTraceId() + { + Started.Clear(); + + using var clientActivity = ActivitySources.ApplicationGrainSource.StartActivity("client-nested-calls-test"); + clientActivity?.Start(); + + Assert.NotNull(clientActivity); + var clientTraceId = clientActivity.TraceId.ToString(); + + _output.WriteLine($"Client TraceId: {clientTraceId}"); + + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var (localInfo, nestedInfo) = await grain.GetNestedTraceContextInfo(); + + _output.WriteLine($"First Grain TraceId: {localInfo.TraceId}"); + _output.WriteLine($"Nested Grain TraceId: {nestedInfo.TraceId}"); + + // Both grains should have activities + Assert.True(localInfo.HasActivity, "First grain should have an Activity.Current"); + Assert.True(nestedInfo.HasActivity, "Nested grain should have an Activity.Current"); + + // CRITICAL: All calls should share the same TraceId + Assert.Equal(clientTraceId, localInfo.TraceId); + Assert.Equal(clientTraceId, nestedInfo.TraceId); + } + finally + { + clientActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + /// + /// Verifies that traceparent header is properly set in RequestContext when making grain calls. + /// This tests the outgoing filter's injection of trace context. + /// + [Fact] + [TestCategory("BVT")] + public async Task TraceParentIsSetInRequestContext() + { + Started.Clear(); + + using var clientActivity = ActivitySources.ApplicationGrainSource.StartActivity("client-traceparent-test"); + clientActivity?.Start(); + + Assert.NotNull(clientActivity); + var clientTraceId = clientActivity.TraceId.ToString(); + + _output.WriteLine($"Client TraceId: {clientTraceId}"); + + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var serverTraceInfo = await grain.GetTraceContextInfo(); + + _output.WriteLine($"Server TraceParentFromRequestContext: {serverTraceInfo.TraceParentFromRequestContext}"); + + // traceparent header should be present in RequestContext + Assert.NotNull(serverTraceInfo.TraceParentFromRequestContext); + Assert.NotEmpty(serverTraceInfo.TraceParentFromRequestContext); + + // traceparent should contain the client's TraceId + Assert.Contains(clientTraceId, serverTraceInfo.TraceParentFromRequestContext); + } + finally + { + clientActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + /// + /// Verifies that the client-side outgoing span and server-side incoming span are properly linked. + /// The server span's parent should be the client's outgoing span. + /// + [Fact] + [TestCategory("BVT")] + public async Task ClientAndServerSpansAreProperlyLinked() + { + Started.Clear(); + + using var clientParentActivity = ActivitySources.ApplicationGrainSource.StartActivity("client-linking-test"); + clientParentActivity?.Start(); + + Assert.NotNull(clientParentActivity); + var clientTraceId = clientParentActivity.TraceId.ToString(); + + _output.WriteLine($"Client Parent TraceId: {clientTraceId}"); + _output.WriteLine($"Client Parent SpanId: {clientParentActivity.SpanId}"); + + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + _ = await grain.GetTraceContextInfo(); + + // Find the client-side outgoing span (should be a child of our test activity) + var clientOutgoingSpan = Started + .Where(a => a.Source.Name == ActivitySources.ApplicationGrainActivitySourceName + && a.Kind == ActivityKind.Client + && a.OperationName.Contains("GetTraceContextInfo")) + .FirstOrDefault(); + + // Find the server-side incoming span + var serverIncomingSpan = Started + .Where(a => a.Source.Name == ActivitySources.ApplicationGrainActivitySourceName + && a.Kind == ActivityKind.Server + && a.OperationName.Contains("GetTraceContextInfo")) + .FirstOrDefault(); + + _output.WriteLine($"Client Outgoing Span: {clientOutgoingSpan?.Id ?? "(not found)"}"); + _output.WriteLine($"Server Incoming Span: {serverIncomingSpan?.Id ?? "(not found)"}"); + + Assert.NotNull(clientOutgoingSpan); + Assert.NotNull(serverIncomingSpan); + + // Both should share the same TraceId + Assert.Equal(clientTraceId, clientOutgoingSpan.TraceId.ToString()); + Assert.Equal(clientTraceId, serverIncomingSpan.TraceId.ToString()); + + // Client outgoing span should be parented to our test activity + Assert.Equal(clientParentActivity.SpanId.ToString(), clientOutgoingSpan.ParentSpanId.ToString()); + + // Server span's parent should be the client outgoing span + Assert.Equal(clientOutgoingSpan.SpanId.ToString(), serverIncomingSpan.ParentSpanId.ToString()); + } + finally + { + clientParentActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + /// + /// Verifies that trace context is properly propagated even when the client has no active activity. + /// The server should still create its own trace in this case. + /// + [Fact] + [TestCategory("BVT")] + public async Task ServerCreatesOwnTraceWhenClientHasNoActivity() + { + Started.Clear(); + + // Ensure no activity is current on the client + var previousActivity = Activity.Current; + Activity.Current = null; + + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var serverTraceInfo = await grain.GetTraceContextInfo(); + + _output.WriteLine($"Server HasActivity: {serverTraceInfo.HasActivity}"); + _output.WriteLine($"Server TraceId: {serverTraceInfo.TraceId}"); + + // Server should still create an activity (starting a new trace) + Assert.True(serverTraceInfo.HasActivity, "Server should create an activity even when client has none"); + Assert.NotNull(serverTraceInfo.TraceId); + Assert.NotEmpty(serverTraceInfo.TraceId); + } + finally + { + Activity.Current = previousActivity; + PrintActivityDiagnostics(); + } + } + + /// + /// Verifies that multiple concurrent grain calls from the same client activity + /// all share the same TraceId. + /// + [Fact] + [TestCategory("BVT")] + public async Task ConcurrentGrainCallsShareSameTraceId() + { + Started.Clear(); + + using var clientActivity = ActivitySources.ApplicationGrainSource.StartActivity("client-concurrent-test"); + clientActivity?.Start(); + + Assert.NotNull(clientActivity); + var clientTraceId = clientActivity.TraceId.ToString(); + + _output.WriteLine($"Client TraceId: {clientTraceId}"); + + try + { + var tasks = Enumerable.Range(0, 5) + .Select(i => _fixture.GrainFactory.GetGrain(Random.Shared.Next()).GetTraceContextInfo()) + .ToList(); + + var results = await Task.WhenAll(tasks); + + foreach (var (result, index) in results.Select((r, i) => (r, i))) + { + _output.WriteLine($"Grain {index} TraceId: {result.TraceId}"); + + Assert.True(result.HasActivity, $"Grain {index} should have an Activity.Current"); + Assert.Equal(clientTraceId, result.TraceId); + } + } + finally + { + clientActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + /// + /// EDGE CASE: Tests trace context propagation when the traceparent header contains an unexpected format. + /// The server should handle malformed headers gracefully and still create an activity. + /// + [Fact] + [TestCategory("BVT")] + public async Task ServerHandlesMalformedTraceParentGracefully() + { + Started.Clear(); + + // Manually set an invalid traceparent in RequestContext + RequestContext.Set("traceparent", "invalid-traceparent-value"); + + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var serverTraceInfo = await grain.GetTraceContextInfo(); + + _output.WriteLine($"Server HasActivity: {serverTraceInfo.HasActivity}"); + _output.WriteLine($"Server TraceId: {serverTraceInfo.TraceId}"); + _output.WriteLine($"Server TraceParentFromRequestContext: {serverTraceInfo.TraceParentFromRequestContext}"); + + // Server should still have an activity (creating a new trace) + Assert.True(serverTraceInfo.HasActivity, "Server should create an activity even with malformed traceparent"); + Assert.NotNull(serverTraceInfo.TraceId); + } + finally + { + RequestContext.Clear(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that the traceparent in RequestContext reflects the client's outgoing span, + /// not the original parent activity. This verifies proper span creation on the client side. + /// + [Fact] + [TestCategory("BVT")] + public async Task TraceParentReflectsClientOutgoingSpan() + { + Started.Clear(); + + using var clientParentActivity = ActivitySources.ApplicationGrainSource.StartActivity("client-traceparent-reflection-test"); + clientParentActivity?.Start(); + + Assert.NotNull(clientParentActivity); + var clientTraceId = clientParentActivity.TraceId.ToString(); + var clientParentSpanId = clientParentActivity.SpanId.ToString(); + + _output.WriteLine($"Client Parent TraceId: {clientTraceId}"); + _output.WriteLine($"Client Parent SpanId: {clientParentSpanId}"); + + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var serverTraceInfo = await grain.GetTraceContextInfo(); + + _output.WriteLine($"Server TraceParentFromRequestContext: {serverTraceInfo.TraceParentFromRequestContext}"); + _output.WriteLine($"Server ParentSpanId: {serverTraceInfo.ParentSpanId}"); + + Assert.NotNull(serverTraceInfo.TraceParentFromRequestContext); + + // The traceparent should contain the TraceId + Assert.Contains(clientTraceId, serverTraceInfo.TraceParentFromRequestContext); + + // The server's parent span ID should NOT be the original client parent span ID + // It should be the span ID of the client's outgoing call span + // (This is because the client creates a new span for the outgoing call) + Assert.NotEqual(clientParentSpanId, serverTraceInfo.ParentSpanId); + + // Find the client outgoing span + var clientOutgoingSpan = Started + .FirstOrDefault(a => a.Source.Name == ActivitySources.ApplicationGrainActivitySourceName + && a.Kind == ActivityKind.Client + && a.OperationName.Contains("GetTraceContextInfo")); + + Assert.NotNull(clientOutgoingSpan); + + // The server's parent span ID should match the client outgoing span ID + Assert.Equal(clientOutgoingSpan.SpanId.ToString(), serverTraceInfo.ParentSpanId); + + // The traceparent should contain the client outgoing span ID + Assert.Contains(clientOutgoingSpan.SpanId.ToString(), serverTraceInfo.TraceParentFromRequestContext); + } + finally + { + clientParentActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests trace context propagation when making calls from within a grain's OnActivateAsync. + /// This is a common edge case where activation might not have a proper trace context. + /// + [Fact] + [TestCategory("BVT")] + public async Task TraceContextIsPropagatedDuringActivation() + { + Started.Clear(); + + using var clientActivity = ActivitySources.ApplicationGrainSource.StartActivity("client-activation-context-test"); + clientActivity?.Start(); + + Assert.NotNull(clientActivity); + var clientTraceId = clientActivity.TraceId.ToString(); + + _output.WriteLine($"Client TraceId: {clientTraceId}"); + + try + { + // Make a call that triggers activation + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var serverTraceInfo = await grain.GetTraceContextInfo(); + + _output.WriteLine($"Server TraceId (during activation): {serverTraceInfo.TraceId}"); + + // Verify the trace ID matches + Assert.Equal(clientTraceId, serverTraceInfo.TraceId); + + // Find the activation span + var activationSpan = Started + .FirstOrDefault(a => a.OperationName == ActivityNames.ActivateGrain); + + if (activationSpan is not null) + { + _output.WriteLine($"Activation Span TraceId: {activationSpan.TraceId}"); + // The activation span should also share the same trace ID + Assert.Equal(clientTraceId, activationSpan.TraceId.ToString()); + } + } + finally + { + clientActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + /// + /// Tests that tracestate header is properly propagated along with traceparent. + /// + [Fact] + [TestCategory("BVT")] + public async Task TraceStateIsPropagatedWithTraceParent() + { + Started.Clear(); + + using var clientActivity = ActivitySources.ApplicationGrainSource.StartActivity("client-tracestate-test"); + clientActivity?.Start(); + + Assert.NotNull(clientActivity); + + // Set a tracestate on the client activity + clientActivity.TraceStateString = "vendor1=value1,vendor2=value2"; + + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var serverTraceInfo = await grain.GetTraceContextInfo(); + + // Find the server span + var serverSpan = Started + .FirstOrDefault(a => a.Source.Name == ActivitySources.ApplicationGrainActivitySourceName + && a.Kind == ActivityKind.Server + && a.OperationName.Contains("GetTraceContextInfo")); + + _output.WriteLine($"Server Span TraceState: {serverSpan?.TraceStateString ?? "(null)"}"); + + // The tracestate should be propagated to the server + // Note: This test may need adjustment based on how Orleans handles tracestate + if (serverSpan is not null && !string.IsNullOrEmpty(serverSpan.TraceStateString)) + { + Assert.Contains("vendor1", serverSpan.TraceStateString); + } + } + finally + { + clientActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + /// + /// CRITICAL TEST: Verifies that when a grain call triggers activation, the activation span + /// shares the same TraceId as the grain call and is properly linked in the trace. + /// + /// This test reproduces the production issue where the activation span starts a new trace + /// instead of being part of the incoming grain call's trace. + /// + /// Expected trace structure: + /// client-parent-activity + /// └── ITraceContextPropagationGrain/GetTraceContextInfo (Client, outgoing) + /// └── ITraceContextPropagationGrain/GetTraceContextInfo (Server, incoming) + /// └── activate grain (should be linked to this trace!) + /// ├── register directory entry + /// └── execute OnActivateAsync + /// + /// Bug scenario (what we're testing for): + /// activate grain (NEW TRACE - disconnected from client!) <-- THIS IS THE BUG + /// ├── register directory entry + /// └── execute OnActivateAsync + /// + [Fact] + [TestCategory("BVT")] + public async Task ActivationSpanSharesTraceIdWithTriggeringGrainCall() + { + Started.Clear(); + + using var clientActivity = ActivitySources.ApplicationGrainSource.StartActivity("client-activation-trace-test"); + clientActivity?.Start(); + + Assert.NotNull(clientActivity); + var clientTraceId = clientActivity.TraceId.ToString(); + + _output.WriteLine($"Client TraceId: {clientTraceId}"); + + try + { + // Use a unique grain ID to ensure we trigger a new activation + var uniqueGrainId = Random.Shared.Next(); + var grain = _fixture.GrainFactory.GetGrain(uniqueGrainId); + + // This call will trigger activation since it's a new grain + var serverTraceInfo = await grain.GetTraceContextInfo(); + + _output.WriteLine($"Server TraceId: {serverTraceInfo.TraceId}"); + + // Find the activation span + var activationSpan = Started + .FirstOrDefault(a => a.OperationName == ActivityNames.ActivateGrain); + + _output.WriteLine($"Activation Span found: {activationSpan is not null}"); + if (activationSpan is not null) + { + _output.WriteLine($"Activation Span TraceId: {activationSpan.TraceId}"); + _output.WriteLine($"Activation Span ParentSpanId: {activationSpan.ParentSpanId}"); + _output.WriteLine($"Activation Span ParentId: {activationSpan.ParentId}"); + } + + // CRITICAL ASSERTION: Activation span must exist + Assert.NotNull(activationSpan); + + // CRITICAL ASSERTION: Activation span must share the same TraceId as the client + // If this fails, the activation is starting a new trace instead of being part + // of the incoming grain call's trace! + Assert.Equal(clientTraceId, activationSpan.TraceId.ToString()); + + // CRITICAL ASSERTION: Activation span should have a parent (not be a root span) + // In the bug scenario, the activation span has no parent and starts a new trace + Assert.False( + string.IsNullOrEmpty(activationSpan.ParentId), + "Activation span should have a parent! If this fails, the activation is starting a new trace."); + + // Verify the grain call span also shares the same trace ID + Assert.Equal(clientTraceId, serverTraceInfo.TraceId); + } + finally + { + clientActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + /// + /// Verifies that when activation is triggered by a grain call, the entire trace hierarchy is correct: + /// 1. Client outgoing span + /// 2. Server incoming grain call span (parented to client outgoing) + /// 3. Activation span (should share trace context with the grain call) + /// 4. OnActivateAsync span (parented to activation span) + /// + /// This test checks the full hierarchy to ensure no span is disconnected. + /// + [Fact] + [TestCategory("BVT")] + public async Task FullTraceHierarchyIsCorrectWhenActivationTriggeredByGrainCall() + { + Started.Clear(); + + using var clientActivity = ActivitySources.ApplicationGrainSource.StartActivity("client-hierarchy-test"); + clientActivity?.Start(); + + Assert.NotNull(clientActivity); + var clientTraceId = clientActivity.TraceId.ToString(); + var clientSpanId = clientActivity.SpanId.ToString(); + + _output.WriteLine($"=== TEST START ==="); + _output.WriteLine($"Client Activity TraceId: {clientTraceId}"); + _output.WriteLine($"Client Activity SpanId: {clientSpanId}"); + + try + { + // Use a grain type that has OnActivateAsync to ensure we get all spans + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + _ = await grain.GetActivityId(); + + _output.WriteLine($"\n=== SPAN ANALYSIS ==="); + + // 1. Find client outgoing span + var clientOutgoingSpan = Started + .FirstOrDefault(a => a.Source.Name == ActivitySources.ApplicationGrainActivitySourceName + && a.Kind == ActivityKind.Client + && a.OperationName.Contains("GetActivityId")); + + _output.WriteLine($"\n1. Client Outgoing Span:"); + if (clientOutgoingSpan is not null) + { + _output.WriteLine($" TraceId: {clientOutgoingSpan.TraceId}"); + _output.WriteLine($" SpanId: {clientOutgoingSpan.SpanId}"); + _output.WriteLine($" ParentSpanId: {clientOutgoingSpan.ParentSpanId}"); + _output.WriteLine($" ParentId: {clientOutgoingSpan.ParentId}"); + } + else + { + _output.WriteLine($" NOT FOUND!"); + } + + // 2. Find server incoming span + var serverIncomingSpan = Started + .FirstOrDefault(a => a.Source.Name == ActivitySources.ApplicationGrainActivitySourceName + && a.Kind == ActivityKind.Server + && a.OperationName.Contains("GetActivityId")); + + _output.WriteLine($"\n2. Server Incoming Span:"); + if (serverIncomingSpan is not null) + { + _output.WriteLine($" TraceId: {serverIncomingSpan.TraceId}"); + _output.WriteLine($" SpanId: {serverIncomingSpan.SpanId}"); + _output.WriteLine($" ParentSpanId: {serverIncomingSpan.ParentSpanId}"); + _output.WriteLine($" ParentId: {serverIncomingSpan.ParentId}"); + } + else + { + _output.WriteLine($" NOT FOUND!"); + } + + // 3. Find activation span + var activationSpan = Started + .FirstOrDefault(a => a.OperationName == ActivityNames.ActivateGrain); + + _output.WriteLine($"\n3. Activation Span:"); + if (activationSpan is not null) + { + _output.WriteLine($" TraceId: {activationSpan.TraceId}"); + _output.WriteLine($" SpanId: {activationSpan.SpanId}"); + _output.WriteLine($" ParentSpanId: {activationSpan.ParentSpanId}"); + _output.WriteLine($" ParentId: {activationSpan.ParentId ?? "(null - ROOT SPAN!)"}"); + } + else + { + _output.WriteLine($" NOT FOUND!"); + } + + // 4. Find OnActivateAsync span + var onActivateSpan = Started + .FirstOrDefault(a => a.OperationName == ActivityNames.OnActivate); + + _output.WriteLine($"\n4. OnActivateAsync Span:"); + if (onActivateSpan is not null) + { + _output.WriteLine($" TraceId: {onActivateSpan.TraceId}"); + _output.WriteLine($" SpanId: {onActivateSpan.SpanId}"); + _output.WriteLine($" ParentSpanId: {onActivateSpan.ParentSpanId}"); + _output.WriteLine($" ParentId: {onActivateSpan.ParentId}"); + } + else + { + _output.WriteLine($" NOT FOUND (grain may not implement IGrainBase)"); + } + + // ASSERTIONS + _output.WriteLine($"\n=== ASSERTIONS ==="); + + // All spans should share the same TraceId + Assert.NotNull(clientOutgoingSpan); + Assert.NotNull(serverIncomingSpan); + Assert.NotNull(activationSpan); + + _output.WriteLine($"Checking all spans share TraceId {clientTraceId}..."); + + Assert.Equal(clientTraceId, clientOutgoingSpan.TraceId.ToString()); + _output.WriteLine($" ✓ Client outgoing span has correct TraceId"); + + Assert.Equal(clientTraceId, serverIncomingSpan.TraceId.ToString()); + _output.WriteLine($" ✓ Server incoming span has correct TraceId"); + + // THIS IS THE CRITICAL CHECK - activation span must share the trace ID! + Assert.Equal(clientTraceId, activationSpan.TraceId.ToString()); + _output.WriteLine($" ✓ Activation span has correct TraceId"); + + // Client outgoing span should be parented to our test activity + Assert.Equal(clientSpanId, clientOutgoingSpan.ParentSpanId.ToString()); + _output.WriteLine($" ✓ Client outgoing span is parented to test activity"); + + // Server incoming span should be parented to client outgoing span + Assert.Equal(clientOutgoingSpan.SpanId.ToString(), serverIncomingSpan.ParentSpanId.ToString()); + _output.WriteLine($" ✓ Server incoming span is parented to client outgoing span"); + + // Activation span should have a parent (not be a root span) + Assert.False( + string.IsNullOrEmpty(activationSpan.ParentId), + "Activation span should not be a root span! This indicates broken trace context propagation."); + _output.WriteLine($" ✓ Activation span has a parent (not a root span)"); + + if (onActivateSpan is not null) + { + Assert.Equal(clientTraceId, onActivateSpan.TraceId.ToString()); + _output.WriteLine($" ✓ OnActivateAsync span has correct TraceId"); + + Assert.Equal(activationSpan.SpanId.ToString(), onActivateSpan.ParentSpanId.ToString()); + _output.WriteLine($" ✓ OnActivateAsync span is parented to activation span"); + } + + _output.WriteLine($"\n=== ALL CHECKS PASSED ==="); + } + finally + { + clientActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + /// + /// Diagnostic test that checks if the traceparent is properly present in the message's + /// RequestContextData when it reaches the server. This helps diagnose production issues + /// where trace context might not be propagating. + /// + [Fact] + [TestCategory("BVT")] + public async Task DiagnoseTraceContextInRequestContextData() + { + Started.Clear(); + + using var clientActivity = ActivitySources.ApplicationGrainSource.StartActivity("client-diagnostic-test"); + clientActivity?.Start(); + + Assert.NotNull(clientActivity); + var clientTraceId = clientActivity.TraceId.ToString(); + + _output.WriteLine($"=== DIAGNOSTIC TEST ==="); + _output.WriteLine($"Client Activity TraceId: {clientTraceId}"); + _output.WriteLine($"Client Activity SpanId: {clientActivity.SpanId}"); + + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var serverTraceInfo = await grain.GetTraceContextInfo(); + + _output.WriteLine($"\n=== Server-side RequestContext Analysis ==="); + _output.WriteLine($"TraceParent from RequestContext: {serverTraceInfo.TraceParentFromRequestContext ?? "(NULL or MISSING!)"}"); + _output.WriteLine($"Server Activity TraceId: {serverTraceInfo.TraceId ?? "(NULL!)"}"); + _output.WriteLine($"Server Activity ParentSpanId: {serverTraceInfo.ParentSpanId ?? "(NULL!)"}"); + _output.WriteLine($"Server Activity HasRemoteParent: {serverTraceInfo.IsRemote}"); + + // DIAGNOSTIC ASSERTIONS + if (string.IsNullOrEmpty(serverTraceInfo.TraceParentFromRequestContext)) + { + _output.WriteLine("\n⚠️ WARNING: traceparent is NOT present in RequestContext!"); + _output.WriteLine("This would cause activation spans to start a new trace."); + _output.WriteLine("Check that AddActivityPropagation() is called on both client and silo."); + } + else + { + _output.WriteLine($"\n✓ traceparent IS present in RequestContext"); + + // Parse and validate the traceparent format + var parts = serverTraceInfo.TraceParentFromRequestContext.Split('-'); + if (parts.Length >= 3) + { + var version = parts[0]; + var traceId = parts[1]; + var parentId = parts[2]; + + _output.WriteLine($" Version: {version}"); + _output.WriteLine($" TraceId: {traceId}"); + _output.WriteLine($" ParentSpanId: {parentId}"); + + Assert.Equal(clientTraceId, traceId); + _output.WriteLine($" ✓ TraceId matches client's TraceId"); + } + } + + // The traceparent should be present + Assert.NotNull(serverTraceInfo.TraceParentFromRequestContext); + Assert.NotEmpty(serverTraceInfo.TraceParentFromRequestContext); + Assert.Contains(clientTraceId, serverTraceInfo.TraceParentFromRequestContext); + } + finally + { + clientActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + /// + /// CRITICAL TEST: Simulates a scenario where the calling code has no active Activity. + /// In this case, the ActivityPropagationOutgoingGrainCallFilter's StartActivity will return null + /// (because there's no parent activity and potentially no listener), and no traceparent will be injected. + /// + /// This test verifies that when a grain call is made without an active Activity, + /// the activation span will NOT have a parent and will start a new trace. + /// + /// This reproduces the production issue where: + /// - `activate grain` span has empty parentSpanId + /// - The trace appears disconnected from the originating call + /// + [Fact] + [TestCategory("BVT")] + public async Task ActivationStartsNewTraceWhenCallerHasNoActivity() + { + Started.Clear(); + + // Ensure no Activity is current + var previousActivity = Activity.Current; + Activity.Current = null; + + try + { + _output.WriteLine("=== TEST: Calling grain with NO Activity.Current ==="); + _output.WriteLine($"Activity.Current before call: {Activity.Current?.Id ?? "(null)"}"); + + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var serverTraceInfo = await grain.GetTraceContextInfo(); + + _output.WriteLine($"\n=== Server Response ==="); + _output.WriteLine($"Server TraceParent from RequestContext: {serverTraceInfo.TraceParentFromRequestContext ?? "(NULL - this is the bug!)"}"); + _output.WriteLine($"Server HasActivity: {serverTraceInfo.HasActivity}"); + _output.WriteLine($"Server TraceId: {serverTraceInfo.TraceId}"); + + // Find the activation span + var activationSpan = Started + .FirstOrDefault(a => a.OperationName == ActivityNames.ActivateGrain); + + _output.WriteLine($"\n=== Activation Span Analysis ==="); + if (activationSpan is not null) + { + _output.WriteLine($"Activation Span TraceId: {activationSpan.TraceId}"); + _output.WriteLine($"Activation Span ParentId: {activationSpan.ParentId ?? "(NULL - ROOT SPAN!)"}"); + _output.WriteLine($"Activation Span ParentSpanId: {activationSpan.ParentSpanId}"); + + // When there's no caller activity, the activation span should have no parent + // This is the scenario that matches the production trace + if (string.IsNullOrEmpty(activationSpan.ParentId)) + { + _output.WriteLine("\n⚠️ EXPECTED BEHAVIOR: Activation span is a ROOT span (no parent)"); + _output.WriteLine("This matches the production issue where activate grain has empty parentSpanId"); + } + } + else + { + _output.WriteLine("Activation span NOT FOUND"); + } + + // Document the expected behavior: without a caller activity, traceparent won't be in RequestContext + // and the activation will start a new trace + if (string.IsNullOrEmpty(serverTraceInfo.TraceParentFromRequestContext)) + { + _output.WriteLine("\n✓ CONFIRMED: traceparent is NOT in RequestContext when caller has no Activity"); + _output.WriteLine("This causes the activation span to start a new trace (no parent)"); + } + } + finally + { + Activity.Current = previousActivity; + PrintActivityDiagnostics(); + } + } + + /// + /// Tests what happens when a grain call is made from within another grain that has no activity context. + /// This simulates scenarios like: + /// - Grain timers triggering calls + /// - Reminder callbacks making grain calls + /// - Stream subscription handlers + /// - Background processing in grains + /// + [Fact] + [TestCategory("BVT")] + public async Task GrainToGrainCallWithoutActivityContext() + { + Started.Clear(); + + // First, activate a grain that will make a nested call + // We start with an activity to ensure the first grain is activated with proper context + using var clientActivity = ActivitySources.ApplicationGrainSource.StartActivity("setup-activity"); + clientActivity?.Start(); + + var callerGrain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + + // First call to ensure the grain is activated + _ = await callerGrain.GetTraceContextInfo(); + + clientActivity?.Stop(); + Activity.Current = null; + Started.Clear(); + + _output.WriteLine("=== TEST: Grain-to-grain call with no ambient activity ==="); + + try + { + // Now make another call - the grain is already activated + // But if this call triggers a nested call within the grain, + // and there's no Activity.Current, the nested call won't have trace context + var result = await callerGrain.GetNestedTraceContextInfo(); + + _output.WriteLine($"Caller grain TraceId: {result.Local.TraceId}"); + _output.WriteLine($"Nested grain TraceId: {result.Nested.TraceId}"); + + // Both should have activities (server creates one even without traceparent) + Assert.True(result.Local.HasActivity); + Assert.True(result.Nested.HasActivity); + + // But they should share the same trace because grain-to-grain calls preserve context + Assert.Equal(result.Local.TraceId, result.Nested.TraceId); + } + finally + { + PrintActivityDiagnostics(); + } + } + + /// + /// CRITICAL TEST: This test reproduces the exact production issue where: + /// - OpenTelemetry is configured to listen to Microsoft.Orleans.Lifecycle (for activation spans) + /// - But NOT listening to Microsoft.Orleans.Application (for grain call spans) + /// + /// When this happens: + /// 1. Client makes a grain call + /// 2. ActivityPropagationOutgoingGrainCallFilter tries to start an activity on ApplicationGrainSource + /// 3. StartActivity returns NULL because there's no listener for that source + /// 4. No traceparent is injected into RequestContext + /// 5. Server receives message with no traceparent + /// 6. Catalog.GetOrCreateActivation starts activation span with no parent + /// 7. The activation span becomes a ROOT span (disconnected from the original trace) + /// + /// Expected trace in production (BUG): + /// activate grain (NO PARENT - starts new trace!) + /// ├── register directory entry + /// └── execute OnActivateAsync + /// + /// This test verifies that if Application source isn't being sampled, + /// the activation span will have no parent. + /// + [Fact] + [TestCategory("BVT")] + public async Task ActivationSpanHasNoParentWhenApplicationSourceNotSampled() + { + // This test documents the expected behavior when only Lifecycle source is sampled + // In our test fixture, we DO sample Application source, so this test verifies correct behavior + // In production, if Application source is NOT sampled, the activation will be a root span + + Started.Clear(); + + using var clientActivity = ActivitySources.ApplicationGrainSource.StartActivity("client-sampling-test"); + clientActivity?.Start(); + + _output.WriteLine("=== TEST: Verifying sampling behavior ==="); + _output.WriteLine($"Client Activity created: {clientActivity is not null}"); + _output.WriteLine($"Client Activity ID: {clientActivity?.Id}"); + + if (clientActivity is null) + { + _output.WriteLine("\n⚠️ CLIENT ACTIVITY IS NULL!"); + _output.WriteLine("This means Microsoft.Orleans.Application source is NOT being sampled."); + _output.WriteLine("The ActivityPropagationOutgoingGrainCallFilter will NOT inject traceparent."); + _output.WriteLine("This causes activation spans to start new traces (no parent)."); + } + + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var serverTraceInfo = await grain.GetTraceContextInfo(); + + _output.WriteLine($"\n=== Results ==="); + _output.WriteLine($"Server has traceparent in RequestContext: {!string.IsNullOrEmpty(serverTraceInfo.TraceParentFromRequestContext)}"); + _output.WriteLine($"Server TraceParent: {serverTraceInfo.TraceParentFromRequestContext ?? "(NULL)"}"); + + // Find spans by source + var applicationSpans = Started.Where(a => a.Source.Name == ActivitySources.ApplicationGrainActivitySourceName).ToList(); + var lifecycleSpans = Started.Where(a => a.Source.Name == ActivitySources.LifecycleActivitySourceName).ToList(); + + _output.WriteLine($"\n=== Spans by Source ==="); + _output.WriteLine($"Microsoft.Orleans.Application spans: {applicationSpans.Count}"); + foreach (var span in applicationSpans) + { + _output.WriteLine($" - {span.OperationName} (Kind: {span.Kind})"); + } + _output.WriteLine($"Microsoft.Orleans.Lifecycle spans: {lifecycleSpans.Count}"); + foreach (var span in lifecycleSpans) + { + _output.WriteLine($" - {span.OperationName} (ParentId: {span.ParentId ?? "NULL - ROOT"})"); + } + + // Find the client outgoing span + var clientOutgoingSpan = Started + .FirstOrDefault(a => a.Source.Name == ActivitySources.ApplicationGrainActivitySourceName + && a.Kind == ActivityKind.Client); + + _output.WriteLine($"\n=== Key Finding ==="); + if (clientOutgoingSpan is null && applicationSpans.Count == 0) + { + _output.WriteLine("⚠️ No Application source spans found!"); + _output.WriteLine("If this happens in production (no listener for Application source),"); + _output.WriteLine("the activation span will have no parent."); + } + + // In our test environment, Application source IS sampled, so we should have spans + Assert.NotNull(clientActivity); + Assert.True(applicationSpans.Count > 0, + "Application source spans should exist. In production, verify Microsoft.Orleans.Application is in your TracerProvider sources."); + } + finally + { + clientActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + /// + /// Diagnostic test that checks if all Orleans ActivitySources are being properly sampled. + /// Run this to verify your OpenTelemetry configuration includes all necessary sources. + /// + [Fact] + [TestCategory("BVT")] + public async Task DiagnoseActivitySourceSampling() + { + Started.Clear(); + + _output.WriteLine("=== Checking ActivitySource Sampling ===\n"); + + // Test each Orleans ActivitySource + var sourcesToTest = new[] + { + (ActivitySources.ApplicationGrainSource, "Microsoft.Orleans.Application"), + (ActivitySources.RuntimeGrainSource, "Microsoft.Orleans.Runtime"), + (ActivitySources.LifecycleGrainSource, "Microsoft.Orleans.Lifecycle"), + (ActivitySources.StorageGrainSource, "Microsoft.Orleans.Storage"), + }; + + foreach (var (source, name) in sourcesToTest) + { + var testActivity = source.StartActivity($"test-{name}", ActivityKind.Internal); + var isSampled = testActivity is not null; + _output.WriteLine($"{name}: {(isSampled ? "✓ SAMPLED" : "✗ NOT SAMPLED")}"); + testActivity?.Stop(); + } + + _output.WriteLine("\n=== Implications ==="); + _output.WriteLine("For proper trace propagation, you need to sample:"); + _output.WriteLine(" - Microsoft.Orleans.Application (for grain call spans)"); + _output.WriteLine(" - Microsoft.Orleans.Lifecycle (for activation/deactivation spans)"); + _output.WriteLine("\nIf Application is NOT sampled but Lifecycle IS:"); + _output.WriteLine(" - Grain call spans won't be created"); + _output.WriteLine(" - traceparent won't be injected into messages"); + _output.WriteLine(" - Activation spans will start new traces (no parent)"); + + // Make an actual grain call to verify end-to-end + _output.WriteLine("\n=== End-to-End Test ==="); + using var clientActivity = ActivitySources.ApplicationGrainSource.StartActivity("e2e-test"); + if (clientActivity is null) + { + _output.WriteLine("⚠️ Could not create client activity - Application source not sampled!"); + } + else + { + clientActivity.Start(); + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var info = await grain.GetTraceContextInfo(); + + _output.WriteLine($"Server received traceparent: {!string.IsNullOrEmpty(info.TraceParentFromRequestContext)}"); + + var activationSpan = Started.FirstOrDefault(a => a.OperationName == ActivityNames.ActivateGrain); + if (activationSpan is not null) + { + _output.WriteLine($"Activation span has parent: {!string.IsNullOrEmpty(activationSpan.ParentId)}"); + if (string.IsNullOrEmpty(activationSpan.ParentId)) + { + _output.WriteLine("⚠️ BUG: Activation span is a root span!"); + } + } + + clientActivity.Stop(); + } + + PrintActivityDiagnostics(); + } + + /// + /// CRITICAL TEST: Simulates a call from an ASP.NET HTTP request handler. + /// In production, grain calls often originate from HTTP endpoints where: + /// 1. ASP.NET creates an HTTP activity (e.g., "GET /api/users/{id}") + /// 2. The controller calls a grain + /// 3. Orleans should propagate the HTTP trace context to the grain + /// + /// This test verifies that if Activity.Current exists (from HTTP/ASP.NET), + /// Orleans properly propagates it to the grain call and activation spans. + /// + [Fact] + [TestCategory("BVT")] + public async Task TraceContextPropagatedFromHttpActivityToGrainCall() + { + Started.Clear(); + + // Simulate an ASP.NET HTTP activity (different source than Orleans) + using var httpActivitySource = new ActivitySource("Microsoft.AspNetCore", "1.0.0"); + var httpListener = new ActivityListener + { + ShouldListenTo = src => src.Name == "Microsoft.AspNetCore", + Sample = (ref ActivityCreationOptions _) => ActivitySamplingResult.AllDataAndRecorded, + }; + ActivitySource.AddActivityListener(httpListener); + + using var httpActivity = httpActivitySource.StartActivity("GET /api/users/{id}", ActivityKind.Server); + httpActivity?.Start(); + + Assert.NotNull(httpActivity); + var httpTraceId = httpActivity.TraceId.ToString(); + + _output.WriteLine("=== TEST: HTTP to Grain call trace propagation ==="); + _output.WriteLine($"HTTP Activity TraceId: {httpTraceId}"); + _output.WriteLine($"HTTP Activity SpanId: {httpActivity.SpanId}"); + _output.WriteLine($"Activity.Current: {Activity.Current?.Id}"); + + try + { + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var serverTraceInfo = await grain.GetTraceContextInfo(); + + _output.WriteLine($"\n=== Server Response ==="); + _output.WriteLine($"Server TraceId: {serverTraceInfo.TraceId}"); + _output.WriteLine($"Server TraceParent: {serverTraceInfo.TraceParentFromRequestContext ?? "(NULL)"}"); + _output.WriteLine($"Server HasActivity: {serverTraceInfo.HasActivity}"); + + // Find the activation span + var activationSpan = Started + .FirstOrDefault(a => a.OperationName == ActivityNames.ActivateGrain); + + _output.WriteLine($"\n=== Activation Span ==="); + if (activationSpan is not null) + { + _output.WriteLine($"TraceId: {activationSpan.TraceId}"); + _output.WriteLine($"ParentId: {activationSpan.ParentId ?? "(NULL - ROOT!)"}"); + } + + // Server should have the same TraceId as the HTTP activity + Assert.Equal(httpTraceId, serverTraceInfo.TraceId); + + // Activation span should also share the same TraceId + if (activationSpan is not null) + { + Assert.Equal(httpTraceId, activationSpan.TraceId.ToString()); + Assert.False(string.IsNullOrEmpty(activationSpan.ParentId), + "Activation span should have a parent when called from HTTP activity"); + } + } + finally + { + httpActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + /// + /// DIAGNOSTIC TEST: Forces grain activation on a specific (different) silo to test cross-silo + /// trace context propagation. This simulates production scenarios where the placement + /// director places the grain on a different silo than the one receiving the initial call. + /// + /// In production with AddDistributedGrainDirectory(): + /// 1. Client sends call to Silo A + /// 2. Placement decides grain should be on Silo B + /// 3. Message is forwarded to Silo B + /// 4. Silo B creates the activation + /// + /// The question: Is trace context preserved through this forwarding? + /// + [Fact] + [TestCategory("BVT")] + public async Task TraceContextPreservedWhenGrainPlacedOnDifferentSilo() + { + Started.Clear(); + + using var clientActivity = ActivitySources.ApplicationGrainSource.StartActivity("cross-silo-test"); + clientActivity?.Start(); + + Assert.NotNull(clientActivity); + var clientTraceId = clientActivity.TraceId.ToString(); + + _output.WriteLine("=== TEST: Cross-silo grain activation ==="); + _output.WriteLine($"Client TraceId: {clientTraceId}"); + + // Get the silos + var silos = _fixture.HostedCluster.GetActiveSilos().ToList(); + _output.WriteLine($"Active silos: {silos.Count}"); + foreach (var silo in silos) + { + _output.WriteLine($" - {silo.SiloAddress}"); + } + + if (silos.Count < 2) + { + _output.WriteLine("⚠️ Need at least 2 silos for this test"); + return; + } + + try + { + // Use placement hint to force grain onto a specific silo + var targetSilo = silos[1].SiloAddress; // Use second silo + RequestContext.Set(IPlacementDirector.PlacementHintKey, targetSilo); + _output.WriteLine($"Placement hint set to: {targetSilo}"); + + // This grain uses RandomPlacement, so the hint should work + var grain = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + var serverTraceInfo = await grain.GetTraceContextInfo(); + + _output.WriteLine($"\n=== Results ==="); + _output.WriteLine($"Server TraceId: {serverTraceInfo.TraceId}"); + _output.WriteLine($"Server TraceParent: {serverTraceInfo.TraceParentFromRequestContext ?? "(NULL!)"}"); + _output.WriteLine($"Server HasActivity: {serverTraceInfo.HasActivity}"); + + // Find the activation span + var activationSpan = Started.FirstOrDefault(a => a.OperationName == ActivityNames.ActivateGrain); + if (activationSpan is not null) + { + _output.WriteLine($"\nActivation Span:"); + _output.WriteLine($" TraceId: {activationSpan.TraceId}"); + _output.WriteLine($" ParentId: {activationSpan.ParentId ?? "(NULL - ROOT!)"}"); + _output.WriteLine($" HasRemoteParent: {activationSpan.HasRemoteParent}"); + } + + // CRITICAL: Even with cross-silo placement, trace should be preserved + Assert.Equal(clientTraceId, serverTraceInfo.TraceId); + + if (activationSpan is not null) + { + Assert.Equal(clientTraceId, activationSpan.TraceId.ToString()); + Assert.False(string.IsNullOrEmpty(activationSpan.ParentId), + "Activation span should have a parent even with cross-silo placement"); + } + } + finally + { + RequestContext.Clear(); + clientActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + /// + /// DIAGNOSTIC TEST: Tests trace context when a grain-to-grain call crosses silo boundaries. + /// This is common in production where Grain A on Silo 1 calls Grain B on Silo 2. + /// + [Fact] + [TestCategory("BVT")] + public async Task TraceContextPreservedInCrossSiloGrainToGrainCall() + { + Started.Clear(); + + using var clientActivity = ActivitySources.ApplicationGrainSource.StartActivity("cross-silo-g2g-test"); + clientActivity?.Start(); + + Assert.NotNull(clientActivity); + var clientTraceId = clientActivity.TraceId.ToString(); + + var silos = _fixture.HostedCluster.GetActiveSilos().ToList(); + if (silos.Count < 2) + { + _output.WriteLine("⚠️ Need at least 2 silos for this test"); + return; + } + + _output.WriteLine("=== TEST: Cross-silo grain-to-grain call ==="); + _output.WriteLine($"Client TraceId: {clientTraceId}"); + + try + { + // Place first grain on silo 1 + RequestContext.Set(IPlacementDirector.PlacementHintKey, silos[0].SiloAddress); + var grain1 = _fixture.GrainFactory.GetGrain(Random.Shared.Next()); + _ = await grain1.GetTraceContextInfo(); // Activate on silo 1 + RequestContext.Clear(); + + // Now make a nested call - the nested grain should go to silo 2 + RequestContext.Set(IPlacementDirector.PlacementHintKey, silos[1].SiloAddress); + var (local, nested) = await grain1.GetNestedTraceContextInfo(); + RequestContext.Clear(); + + _output.WriteLine($"\n=== Results ==="); + _output.WriteLine($"Local grain TraceId: {local.TraceId}"); + _output.WriteLine($"Nested grain TraceId: {nested.TraceId}"); + _output.WriteLine($"Nested grain HasRemoteParent: {nested.IsRemote}"); + + // Both should share same trace ID + Assert.Equal(clientTraceId, local.TraceId); + Assert.Equal(clientTraceId, nested.TraceId); + } + finally + { + RequestContext.Clear(); + clientActivity?.Stop(); + PrintActivityDiagnostics(); + } + } + + private void PrintActivityDiagnostics() + { + var activities = Started.ToList(); + if (activities.Count == 0) + { + _output.WriteLine("No activities captured."); + return; + } + + var sb = new StringBuilder(); + sb.AppendLine(); + sb.AppendLine("=== CAPTURED ACTIVITIES ==="); + sb.AppendLine($"Total: {activities.Count}"); + sb.AppendLine(); + + foreach (var activity in activities.OrderBy(a => a.StartTimeUtc)) + { + sb.AppendLine($"[{activity.Source.Name}] {activity.OperationName}"); + sb.AppendLine($" ID: {activity.Id}"); + sb.AppendLine($" TraceId: {activity.TraceId}"); + sb.AppendLine($" SpanId: {activity.SpanId}"); + sb.AppendLine($" ParentSpanId: {activity.ParentSpanId}"); + sb.AppendLine($" ParentId: {activity.ParentId}"); + sb.AppendLine($" Kind: {activity.Kind}"); + sb.AppendLine($" HasRemoteParent: {activity.HasRemoteParent}"); + sb.AppendLine(); + } + + _output.WriteLine(sb.ToString()); + } + } +} + diff --git a/test/Tester/ActivityPropagationTests.cs b/test/Tester/ActivityPropagationTests.cs index 8fe2285ce26..01af15e2a56 100644 --- a/test/Tester/ActivityPropagationTests.cs +++ b/test/Tester/ActivityPropagationTests.cs @@ -1,6 +1,6 @@ using System.Diagnostics; using Microsoft.Extensions.Configuration; -using Orleans.Runtime; +using Orleans.Diagnostics; using Orleans.TestingHost; using TestExtensions; using UnitTests.GrainInterfaces; @@ -31,7 +31,7 @@ static ActivityPropagationTests() // This listener specifically targets activities created by Orleans for grain calls Listener = new() { - ShouldListenTo = p => p.Name == ActivityPropagationGrainCallFilter.ApplicationGrainActivitySourceName, + ShouldListenTo = p => p.Name == ActivitySources.ApplicationGrainActivitySourceName, Sample = Sample, SampleUsingParentId = SampleUsingParentId, };