diff --git a/Orleans.slnx b/Orleans.slnx
index 72c928450cf..7daec69935d 100644
--- a/Orleans.slnx
+++ b/Orleans.slnx
@@ -156,6 +156,7 @@
+
diff --git a/src/Azure/Orleans.DurableJobs.AzureStorage/AzureStorageJobShard.Log.cs b/src/Azure/Orleans.DurableJobs.AzureStorage/AzureStorageJobShard.Log.cs
deleted file mode 100644
index cd2ba74ec2a..00000000000
--- a/src/Azure/Orleans.DurableJobs.AzureStorage/AzureStorageJobShard.Log.cs
+++ /dev/null
@@ -1,109 +0,0 @@
-using System;
-using Microsoft.Extensions.Logging;
-
-namespace Orleans.DurableJobs.AzureStorage;
-
-internal sealed partial class AzureStorageJobShard
-{
- [LoggerMessage(
- Level = LogLevel.Information,
- Message = "Initializing shard '{ShardId}' from Azure Storage blob"
- )]
- private static partial void LogInitializingShard(ILogger logger, string shardId);
-
- [LoggerMessage(
- Level = LogLevel.Information,
- Message = "Shard '{ShardId}' initialized successfully. Loaded {JobCount} job(s) in {ElapsedMilliseconds}ms"
- )]
- private static partial void LogShardInitialized(ILogger logger, string shardId, int jobCount, long elapsedMilliseconds);
-
- [LoggerMessage(
- Level = LogLevel.Debug,
- Message = "Adding job '{JobId}' (Name: '{JobName}') to shard '{ShardId}' with due time {DueTime}"
- )]
- private static partial void LogAddingJob(ILogger logger, string jobId, string jobName, string shardId, DateTimeOffset dueTime);
-
- [LoggerMessage(
- Level = LogLevel.Debug,
- Message = "Removing job '{JobId}' from shard '{ShardId}'"
- )]
- private static partial void LogRemovingJob(ILogger logger, string jobId, string shardId);
-
- [LoggerMessage(
- Level = LogLevel.Debug,
- Message = "Retrying job '{JobId}' in shard '{ShardId}' with new due time {NewDueTime}"
- )]
- private static partial void LogRetryingJob(ILogger logger, string jobId, string shardId, DateTimeOffset newDueTime);
-
- [LoggerMessage(
- Level = LogLevel.Trace,
- Message = "Flushing batch of {OperationCount} job operation(s) to shard '{ShardId}'"
- )]
- private static partial void LogFlushingBatch(ILogger logger, int operationCount, string shardId);
-
- [LoggerMessage(
- Level = LogLevel.Debug,
- Message = "Batch of {OperationCount} job operation(s) written to shard '{ShardId}' in {ElapsedMilliseconds}ms. Total committed blocks: {CommittedBlockCount}"
- )]
- private static partial void LogBatchWritten(ILogger logger, int operationCount, string shardId, long elapsedMilliseconds, int committedBlockCount);
-
- [LoggerMessage(
- Level = LogLevel.Trace,
- Message = "Updating metadata for shard '{ShardId}'"
- )]
- private static partial void LogUpdatingMetadata(ILogger logger, string shardId);
-
- [LoggerMessage(
- Level = LogLevel.Debug,
- Message = "Metadata updated for shard '{ShardId}'"
- )]
- private static partial void LogMetadataUpdated(ILogger logger, string shardId);
-
- [LoggerMessage(
- Level = LogLevel.Warning,
- Message = "Shard '{ShardId}' has {CommittedBlockCount} committed blocks, approaching Azure Blob append limit of 50,000"
- )]
- private static partial void LogApproachingBlockLimit(ILogger logger, string shardId, int committedBlockCount);
-
- [LoggerMessage(
- Level = LogLevel.Warning,
- Message = "Large batch detected for shard '{ShardId}': {OperationCount} operations (max configured: {MaxBatchSize})"
- )]
- private static partial void LogLargeBatch(ILogger logger, string shardId, int operationCount, int maxBatchSize);
-
- [LoggerMessage(
- Level = LogLevel.Error,
- Message = "Error writing batch of {OperationCount} operation(s) to shard '{ShardId}'"
- )]
- private static partial void LogErrorWritingBatch(ILogger logger, Exception exception, int operationCount, string shardId);
-
- [LoggerMessage(
- Level = LogLevel.Error,
- Message = "Error updating metadata for shard '{ShardId}'"
- )]
- private static partial void LogErrorUpdatingMetadata(ILogger logger, Exception exception, string shardId);
-
- [LoggerMessage(
- Level = LogLevel.Debug,
- Message = "Stopping storage processor for shard '{ShardId}'"
- )]
- private static partial void LogStoppingProcessor(ILogger logger, string shardId);
-
- [LoggerMessage(
- Level = LogLevel.Information,
- Message = "Storage processor stopped for shard '{ShardId}'"
- )]
- private static partial void LogProcessorStopped(ILogger logger, string shardId);
-
- [LoggerMessage(
- Level = LogLevel.Trace,
- Message = "Processing storage operation queue for shard '{ShardId}'"
- )]
- private static partial void LogProcessingStorageQueue(ILogger logger, string shardId);
-
- [LoggerMessage(
- Level = LogLevel.Debug,
- Message = "Waiting for additional operations to batch (current size: {CurrentSize}, min size: {MinSize}) for shard '{ShardId}'"
- )]
- private static partial void LogWaitingForBatch(ILogger logger, int currentSize, int minSize, string shardId);
-}
diff --git a/src/Azure/Orleans.DurableJobs.AzureStorage/AzureStorageJobShard.cs b/src/Azure/Orleans.DurableJobs.AzureStorage/AzureStorageJobShard.cs
deleted file mode 100644
index c5184ccc911..00000000000
--- a/src/Azure/Orleans.DurableJobs.AzureStorage/AzureStorageJobShard.cs
+++ /dev/null
@@ -1,394 +0,0 @@
-using System;
-using System.Buffers;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.IO;
-using System.Text;
-using System.Text.Json;
-using System.Threading;
-using System.Threading.Channels;
-using System.Threading.Tasks;
-using System.Transactions;
-using Azure;
-using Azure.Storage.Blobs;
-using Azure.Storage.Blobs.Models;
-using Azure.Storage.Blobs.Specialized;
-using Microsoft.Extensions.Logging;
-using Orleans.Hosting;
-using Orleans.Runtime;
-using Orleans.Serialization.Buffers.Adaptors;
-
-namespace Orleans.DurableJobs.AzureStorage;
-
-internal sealed partial class AzureStorageJobShard : JobShard
-{
- private readonly Channel _storageOperationChannel;
- private readonly Task _storageProcessorTask;
- private readonly CancellationTokenSource _shutdownCts = new();
- private readonly AzureStorageJobShardOptions _options;
- private readonly ILogger _logger;
-
- internal AppendBlobClient BlobClient { get; init; }
- internal ETag? ETag { get; private set; }
- internal int CommitedBlockCount { get; private set; }
-
- public AzureStorageJobShard(string id, DateTimeOffset startTime, DateTimeOffset endTime, AppendBlobClient blobClient, IDictionary? metadata, ETag? eTag, AzureStorageJobShardOptions options, ILogger logger)
- : base(id, startTime, endTime)
- {
- BlobClient = blobClient;
- ETag = eTag;
- Metadata = metadata;
- _options = options;
- _logger = logger;
-
- // Create unbounded channel for storage operations
- _storageOperationChannel = Channel.CreateUnbounded(new UnboundedChannelOptions
- {
- SingleReader = true,
- SingleWriter = false
- });
-
- // Start the background task that processes storage operations
- _storageProcessorTask = ProcessStorageOperationsAsync();
- }
-
- protected override async Task PersistAddJobAsync(string jobId, string jobName, DateTimeOffset dueTime, GrainId target, IReadOnlyDictionary? metadata, CancellationToken cancellationToken)
- {
- LogAddingJob(_logger, jobId, jobName, Id, dueTime);
- var operation = JobOperation.CreateAddOperation(jobId, jobName, dueTime, target, metadata);
- await EnqueueStorageOperationAsync(StorageOperation.CreateAppendOperation(operation), cancellationToken);
- }
-
- protected override async Task PersistRemoveJobAsync(string jobId, CancellationToken cancellationToken)
- {
- LogRemovingJob(_logger, jobId, Id);
- var operation = JobOperation.CreateRemoveOperation(jobId);
- await EnqueueStorageOperationAsync(StorageOperation.CreateAppendOperation(operation), cancellationToken);
- }
-
- protected override async Task PersistRetryJobAsync(string jobId, DateTimeOffset newDueTime, CancellationToken cancellationToken)
- {
- LogRetryingJob(_logger, jobId, Id, newDueTime);
- var operation = JobOperation.CreateRetryOperation(jobId, newDueTime);
- await EnqueueStorageOperationAsync(StorageOperation.CreateAppendOperation(operation), cancellationToken);
- }
-
- public async Task UpdateBlobMetadata(IDictionary metadata, CancellationToken cancellationToken)
- {
- LogUpdatingMetadata(_logger, Id);
- await EnqueueStorageOperationAsync(StorageOperation.CreateMetadataOperation(metadata), cancellationToken);
- }
-
- public async ValueTask InitializeAsync(CancellationToken cancellationToken)
- {
- LogInitializingShard(_logger, Id);
- var sw = Stopwatch.StartNew();
-
- // Load existing blob
- var response = await BlobClient.DownloadAsync(cancellationToken: cancellationToken);
- using var stream = response.Value.Content;
-
- // Rebuild state by replaying operations
- var addedJobs = new Dictionary();
- var deletedJobs = new HashSet();
- var jobRetryCounters = new Dictionary();
-
- await foreach (var operation in NetstringJsonSerializer.DecodeAsync(stream, JobOperationJsonContext.Default.JobOperation, cancellationToken))
- {
- switch (operation.Type)
- {
- case JobOperation.OperationType.Add:
- if (!deletedJobs.Contains(operation.Id))
- {
- addedJobs[operation.Id] = operation;
- }
- break;
- case JobOperation.OperationType.Remove:
- deletedJobs.Add(operation.Id);
- addedJobs.Remove(operation.Id);
- jobRetryCounters.Remove(operation.Id);
- break;
- case JobOperation.OperationType.Retry:
- if (!deletedJobs.Contains(operation.Id))
- {
- if (!jobRetryCounters.ContainsKey(operation.Id))
- {
- jobRetryCounters[operation.Id] = (1, operation.DueTime);
- }
- else
- {
- var entry = jobRetryCounters[operation.Id];
- jobRetryCounters[operation.Id] = (entry.dequeueCount + 1, operation.DueTime);
- }
- }
- break;
- }
- }
-
- // Rebuild the priority queue
- foreach (var op in addedJobs.Values)
- {
- var retryCounter = 0;
- var dueTime = op.DueTime!.Value;
- if (jobRetryCounters.TryGetValue(op.Id, out var retryEntries))
- {
- retryCounter = retryEntries.dequeueCount;
- dueTime = retryEntries.newDueTime ?? dueTime;
- }
-
- EnqueueJob(new DurableJob
- {
- Id = op.Id,
- Name = op.Name!,
- DueTime = dueTime,
- TargetGrainId = op.TargetGrainId!.Value,
- ShardId = Id,
- Metadata = op.Metadata,
- },
- retryCounter);
- }
-
- ETag = response.Value.Details.ETag;
-
- sw.Stop();
- LogShardInitialized(_logger, Id, addedJobs.Count, sw.ElapsedMilliseconds);
- }
-
- private async Task EnqueueStorageOperationAsync(StorageOperation operation, CancellationToken cancellationToken)
- {
- await _storageOperationChannel.Writer.WriteAsync(operation, cancellationToken);
- await operation.CompletionSource.Task;
- }
-
- private async Task ProcessStorageOperationsAsync()
- {
- await Task.CompletedTask.ConfigureAwait(ConfigureAwaitOptions.ContinueOnCapturedContext | ConfigureAwaitOptions.ForceYielding);
-
- var cancellationToken = _shutdownCts.Token;
- // TODO: AppendBlob has a limit of 50,000 blocks. Implement blob rotation when this limit is approached.
- var batchOperations = new List(_options.MaxBatchSize);
-
- try
- {
- while (await _storageOperationChannel.Reader.WaitToReadAsync(cancellationToken))
- {
- // Read first operation
- if (!_storageOperationChannel.Reader.TryRead(out var firstOperation))
- {
- continue;
- }
-
- // Handle metadata operations immediately (cannot be batched)
- if (firstOperation.Type is StorageOperationType.UpdateMetadata)
- {
- try
- {
- await UpdateMetadataAsync(firstOperation.Metadata!, cancellationToken);
- LogMetadataUpdated(_logger, Id);
- firstOperation.CompletionSource.TrySetResult();
- }
- catch (Exception ex)
- {
- LogErrorUpdatingMetadata(_logger, ex, Id);
- firstOperation.CompletionSource?.TrySetException(ex);
- }
- continue;
- }
-
- // Collect job operations for batching
- batchOperations.Add(firstOperation);
-
- // Try to collect more operations up to the maximum batch size
- if (TryCollectJobOperationsForBatch(batchOperations))
- {
- // Not enough operations to meet the minimum batch size, wait for more or timeout
- if (batchOperations.Count < _options.MinBatchSize)
- {
- LogWaitingForBatch(_logger, batchOperations.Count, _options.MinBatchSize, Id);
- }
- await Task.Delay(_options.BatchFlushInterval, cancellationToken);
- TryCollectJobOperationsForBatch(batchOperations);
- }
-
- // Process the batch of job operations
- if (batchOperations.Count > 0)
- {
- try
- {
- LogFlushingBatch(_logger, batchOperations.Count, Id);
- await AppendJobOperationBatchAsync(batchOperations, cancellationToken);
-
- // Mark all operations as completed
- foreach (var op in batchOperations)
- {
- op.CompletionSource.TrySetResult();
- }
- }
- catch (Exception ex)
- {
- LogErrorWritingBatch(_logger, ex, batchOperations.Count, Id);
-
- // Mark all operations as failed
- foreach (var op in batchOperations)
- {
- op.CompletionSource?.TrySetException(ex);
- }
- }
- finally
- {
- batchOperations.Clear();
- }
- }
- }
- }
- catch (OperationCanceledException)
- {
- // Ignore
- }
- finally
- {
- // Expected during shutdown - cancel all pending operations
- while (_storageOperationChannel.Reader.TryRead(out var operation))
- {
- operation.CompletionSource?.TrySetCanceled(cancellationToken);
- }
- }
-
- // Local function to collect job operations for batching. Returns true if more operations can be collected.
- bool TryCollectJobOperationsForBatch(List batchOperations)
- {
- // Collect more jobs, up to a maximum batch size
- while (batchOperations.Count < _options.MaxBatchSize && _storageOperationChannel.Reader.TryPeek(out var nextOperation))
- {
- if (nextOperation.Type is StorageOperationType.UpdateMetadata)
- {
- // Stop batching if we encounter a metadata operation
- return false;
- }
- _storageOperationChannel.Reader.TryRead(out var operation);
- Debug.Assert(operation != null);
- batchOperations.Add(operation!);
- }
- return batchOperations.Count != _options.MaxBatchSize;
- }
- }
-
- private async Task AppendJobOperationBatchAsync(List operations, CancellationToken cancellationToken)
- {
- var sw = Stopwatch.StartNew();
- using var stream = PooledBufferStream.Rent();
- try
- {
- stream.Position = 0; // TODO Remove that once PooledBufferStream fixed
-
- // Encode all job operations into a single stream
- foreach (var operation in operations)
- {
- NetstringJsonSerializer.Encode(operation.JobOperation!.Value, stream, JobOperationJsonContext.Default.JobOperation);
- }
- stream.Position = 0;
- var result = await BlobClient.AppendBlockAsync(
- stream,
- new AppendBlobAppendBlockOptions { Conditions = new AppendBlobRequestConditions { IfMatch = ETag } },
- cancellationToken);
- ETag = result.Value.ETag;
- CommitedBlockCount = result.Value.BlobCommittedBlockCount;
-
- sw.Stop();
- LogBatchWritten(_logger, operations.Count, Id, sw.ElapsedMilliseconds, CommitedBlockCount);
-
- // Warn if approaching the 50,000 block limit (warn at 80%)
- if (CommitedBlockCount > 40000)
- {
- LogApproachingBlockLimit(_logger, Id, CommitedBlockCount);
- }
-
- // Warn if batch is unusually large
- if (operations.Count > _options.MaxBatchSize * 0.8)
- {
- LogLargeBatch(_logger, Id, operations.Count, _options.MaxBatchSize);
- }
- }
- finally
- {
- PooledBufferStream.Return(stream);
- }
- }
-
- private async Task UpdateMetadataAsync(IDictionary metadata, CancellationToken cancellationToken)
- {
- var result = await BlobClient.SetMetadataAsync(
- metadata,
- new BlobRequestConditions { IfMatch = ETag },
- cancellationToken);
- ETag = result.Value.ETag;
- Metadata = metadata;
- }
-
- ///
- /// Stops the background storage processor and waits for all pending operations to complete.
- /// After calling this method, no new storage operations can be enqueued.
- /// This method is idempotent and can be called multiple times safely.
- ///
- internal async Task StopProcessorAsync(CancellationToken cancellationToken)
- {
- LogStoppingProcessor(_logger, Id);
-
- // Complete the channel to stop accepting new operations (idempotent operation)
- if (_storageOperationChannel.Writer.TryComplete())
- {
- _shutdownCts.Cancel();
- }
-
- // Wait for the background processor to finish all pending operations
- try
- {
- await _storageProcessorTask.WaitAsync(cancellationToken);
- LogProcessorStopped(_logger, Id);
- }
- catch (OperationCanceledException)
- {
- // Expected during normal shutdown
- LogProcessorStopped(_logger, Id);
- }
- }
-
- public override async ValueTask DisposeAsync()
- {
- await StopProcessorAsync(CancellationToken.None);
- _shutdownCts.Dispose();
- await base.DisposeAsync();
- }
-}
-
-internal enum StorageOperationType
-{
- AppendJobOperation,
- UpdateMetadata
-}
-
-internal sealed class StorageOperation
-{
- public required StorageOperationType Type { get; init; }
- public JobOperation? JobOperation { get; init; }
- public IDictionary? Metadata { get; init; }
- public TaskCompletionSource CompletionSource { get; init; } = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
-
- public static StorageOperation CreateAppendOperation(JobOperation jobOperation)
- {
- return new StorageOperation
- {
- Type = StorageOperationType.AppendJobOperation,
- JobOperation = jobOperation
- };
- }
-
- public static StorageOperation CreateMetadataOperation(IDictionary metadata)
- {
- return new StorageOperation
- {
- Type = StorageOperationType.UpdateMetadata,
- Metadata = metadata
- };
- }
-}
diff --git a/src/Azure/Orleans.DurableJobs.AzureStorage/AzureStorageJobShardManager.cs b/src/Azure/Orleans.DurableJobs.AzureStorage/AzureStorageJobShardManager.cs
deleted file mode 100644
index 4f1abd761b9..00000000000
--- a/src/Azure/Orleans.DurableJobs.AzureStorage/AzureStorageJobShardManager.cs
+++ /dev/null
@@ -1,517 +0,0 @@
-using System;
-using System.Collections.Concurrent;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.Globalization;
-using System.Threading;
-using System.Threading.Tasks;
-using Azure;
-using Azure.Storage.Blobs;
-using Azure.Storage.Blobs.Models;
-using Azure.Storage.Blobs.Specialized;
-using Microsoft.Extensions.Logging;
-using Microsoft.Extensions.Options;
-using Orleans.Hosting;
-using Orleans.Runtime;
-
-namespace Orleans.DurableJobs.AzureStorage;
-
-public sealed partial class AzureStorageJobShardManager : JobShardManager
-{
- private readonly BlobServiceClient _blobServiceClient;
- private readonly string _containerName;
- private readonly string _blobPrefix;
- private BlobContainerClient _client = null!;
- private readonly IClusterMembershipService _clusterMembership;
- private readonly ConcurrentDictionary _jobShardCache = new();
- private readonly ILogger _logger;
- private readonly ILoggerFactory _loggerFactory;
- private readonly AzureStorageJobShardOptions _options;
- private readonly DurableJobsOptions _durableJobsOptions;
- private long _shardCounter = 0; // For generating unique shard IDs
-
- private const string AdoptedCountKey = "AdoptedCount";
- private const string LastAdoptedTimeKey = "LastAdoptedTime";
- private const string LegacyStolenCountKey = "StolenCount";
- private const string LegacyLastStolenTimeKey = "LastStolenTime";
-
- public AzureStorageJobShardManager(
- SiloAddress siloAddress,
- BlobServiceClient client,
- string containerName,
- string blobPrefix,
- AzureStorageJobShardOptions options,
- IOptions durableJobsOptions,
- IClusterMembershipService clusterMembership,
- ILoggerFactory loggerFactory)
- : base(siloAddress)
- {
- _blobServiceClient = client;
- _containerName = containerName;
- _blobPrefix = blobPrefix;
- _clusterMembership = clusterMembership;
- _logger = loggerFactory.CreateLogger();
- _loggerFactory = loggerFactory;
- _options = options;
- _durableJobsOptions = durableJobsOptions.Value;
- }
-
- public AzureStorageJobShardManager(
- ILocalSiloDetails localSiloDetails,
- IOptions options,
- IOptions durableJobsOptions,
- IClusterMembershipService clusterMembership,
- ILoggerFactory loggerFactory)
- : this(localSiloDetails.SiloAddress, options.Value.BlobServiceClient, options.Value.ContainerName, localSiloDetails.ClusterId, options.Value, durableJobsOptions, clusterMembership, loggerFactory)
- {
- }
-
- public override async Task> AssignJobShardsAsync(DateTimeOffset maxShardStartTime, int maxNewClaims, CancellationToken cancellationToken)
- {
- await InitializeIfNeeded(cancellationToken);
- LogAssigningShards(_logger, SiloAddress, maxShardStartTime, _containerName);
-
- var result = new List();
- var newClaimCount = 0;
- await foreach (var blob in _client.GetBlobsAsync(traits: BlobTraits.Metadata, states: BlobStates.None, cancellationToken: cancellationToken, prefix: _blobPrefix))
- {
- // Get the owner and creator of the shard
- var (owner, membershipVersion, shardStartTime, maxDueTime) = ParseMetadata(blob.Metadata);
-
- // Check if the membership version is more recent than our current version
- if (membershipVersion > _clusterMembership.CurrentSnapshot.Version)
- {
- // Refresh membership to at least that version
- await _clusterMembership.Refresh(membershipVersion, cancellationToken);
- }
-
- if (shardStartTime > maxShardStartTime)
- {
- // This shard is too new. Since blobs are returned in alphabetical order and our blob names
- // contain timestamps (yyyyMMddHHmm format), all subsequent blobs will also be too new.
- LogShardTooNew(_logger, blob.Name, shardStartTime, maxShardStartTime);
- break;
- }
-
- // If I am the owner, the shard must be in cache - always return it
- if (owner is not null && owner.Equals(SiloAddress))
- {
- if (_jobShardCache.TryGetValue(blob.Name, out var shard))
- {
- LogShardAssigned(_logger, blob.Name, SiloAddress);
- result.Add(shard);
- }
- else
- {
- // Shard is owned by us but not in cache - this is unexpected, release ownership
- Debug.Assert(false, $"Shard '{blob.Name}' is owned by this silo but not in cache - releasing ownership");
- await ReleaseOwnership(blob.Name);
- }
- continue;
- }
-
- // In debug, verify that if we're not the owner, the shard should not be in our cache
- Debug.Assert(!_jobShardCache.ContainsKey(blob.Name), $"Shard '{blob.Name}' is in cache but we are not the owner (owner: {owner?.ToParsableString() ?? "none"})");
-
- // Check if the owner is valid
- var ownerStatus = owner is not null ? _clusterMembership.CurrentSnapshot.GetSiloStatus(owner) : SiloStatus.None;
-
- if (ownerStatus is not SiloStatus.Dead and not SiloStatus.None)
- {
- // Owner is still active and it's not me, skip this shard
- LogShardStillOwned(_logger, blob.Name, owner!);
- continue;
- }
-
- // Determine if this is an adopted shard (taken from dead owner) vs orphaned (gracefully released)
- var isAdopted = owner is not null && ownerStatus == SiloStatus.Dead;
-
- // Respect the slow-start budget: skip claiming if we've exhausted the budget
- if (newClaimCount >= maxNewClaims)
- {
- continue;
- }
-
- // Try to claim orphaned or adopted shard
- LogClaimingShard(_logger, blob.Name, SiloAddress, owner);
- var blobClient = _client.GetAppendBlobClient(blob.Name);
- var metadata = blob.Metadata;
- var orphanedShard = new AzureStorageJobShard(blob.Name, shardStartTime, maxDueTime, blobClient, metadata, blob.Properties.ETag, _options, _loggerFactory.CreateLogger());
- if (!await TryTakeOwnership(orphanedShard, metadata, SiloAddress, isAdopted, cancellationToken))
- {
- // Either poisoned shard or someone else took ownership - dispose and continue
- await orphanedShard.DisposeAsync();
- continue;
- }
- await orphanedShard.InitializeAsync(cancellationToken);
- // We don't want to add new jobs to shards that we just took ownership of
- await orphanedShard.MarkAsCompleteAsync(cancellationToken);
- _jobShardCache[blob.Name] = orphanedShard;
- LogShardAssigned(_logger, blob.Name, SiloAddress);
- result.Add(orphanedShard);
- newClaimCount++;
- }
-
- LogAssignmentCompleted(_logger, result.Count, SiloAddress);
- return result;
-
- async Task ReleaseOwnership(string blobName)
- {
- try
- {
- var blobClient = _client.GetAppendBlobClient(blobName);
- var properties = await blobClient.GetPropertiesAsync(cancellationToken: cancellationToken);
- var metadata = properties.Value.Metadata;
- metadata.Remove("Owner");
- // Reset adopted count since we're gracefully releasing
- metadata.Remove(AdoptedCountKey);
- metadata.Remove(LastAdoptedTimeKey);
- metadata.Remove(LegacyStolenCountKey);
- metadata.Remove(LegacyLastStolenTimeKey);
- await blobClient.SetMetadataAsync(metadata, new BlobRequestConditions { IfMatch = properties.Value.ETag }, cancellationToken);
- }
- catch (Exception ex)
- {
- // Log but continue - we'll let another silo claim it
- LogWarningReleaseOwnershipNotInCache(_logger, ex, blobName);
- }
- }
-
- async Task TryTakeOwnership(AzureStorageJobShard shard, IDictionary metadata, SiloAddress newOwner, bool isAdopted, CancellationToken ct)
- {
- if (isAdopted)
- {
- var existingAdoptedCount = GetAdoptedCount(metadata);
- if (existingAdoptedCount > _durableJobsOptions.MaxAdoptedCount)
- {
- // Already marked as poisoned.
- return false;
- }
-
- // Increment adopted count for shards taken from dead owners.
- var adoptedCount = existingAdoptedCount + 1;
- if (adoptedCount > _durableJobsOptions.MaxAdoptedCount)
- {
- // Persist poisoned marker so this shard is not repeatedly re-evaluated as newly poisoned.
- SetAdoptedMetadata(metadata, adoptedCount, DateTimeOffset.UtcNow);
- try
- {
- await shard.UpdateBlobMetadata(metadata, ct);
- }
- catch (RequestFailedException ex)
- {
- LogOwnershipFailed(_logger, ex, shard.Id, newOwner);
- }
-
- LogPoisonedShardDetected(_logger, shard.Id, adoptedCount, _durableJobsOptions.MaxAdoptedCount);
- return false;
- }
-
- SetAdoptedMetadata(metadata, adoptedCount, DateTimeOffset.UtcNow);
- LogShardAdopted(_logger, shard.Id, newOwner, adoptedCount);
- }
-
- metadata["Owner"] = newOwner.ToParsableString();
- metadata["MembershipVersion"] = _clusterMembership.CurrentSnapshot.Version.Value.ToString();
-
- try
- {
- await shard.UpdateBlobMetadata(metadata, ct);
- LogOwnershipTaken(_logger, shard.Id, newOwner);
- return true;
- }
- catch (RequestFailedException ex)
- {
- // Someone else took over the shard
- LogOwnershipFailed(_logger, ex, shard.Id, newOwner);
- return false;
- }
- }
-
- static int GetAdoptedCount(IDictionary metadata)
- {
- if (metadata.TryGetValue(AdoptedCountKey, out var countStr)
- && int.TryParse(countStr, NumberStyles.Integer, CultureInfo.InvariantCulture, out var adoptedCount))
- {
- return adoptedCount;
- }
-
- return metadata.TryGetValue(LegacyStolenCountKey, out countStr)
- && int.TryParse(countStr, NumberStyles.Integer, CultureInfo.InvariantCulture, out var legacyCount)
- ? legacyCount
- : 0;
- }
-
- static void SetAdoptedMetadata(IDictionary metadata, int adoptedCount, DateTimeOffset adoptedTime)
- {
- metadata[AdoptedCountKey] = adoptedCount.ToString(CultureInfo.InvariantCulture);
- metadata[LastAdoptedTimeKey] = adoptedTime.ToString("o", CultureInfo.InvariantCulture);
- metadata.Remove(LegacyStolenCountKey);
- metadata.Remove(LegacyLastStolenTimeKey);
- }
- }
-
- public override async Task CreateShardAsync(DateTimeOffset minDueTime, DateTimeOffset maxDueTime, IDictionary metadata, CancellationToken cancellationToken)
- {
- await InitializeIfNeeded(cancellationToken);
- LogRegisteringShard(_logger, SiloAddress, minDueTime, maxDueTime, _containerName);
-
- var i = 0;
- while (true)
- {
- var counter = Interlocked.Increment(ref _shardCounter);
- var shardId = $"{_blobPrefix}-{minDueTime:yyyyMMddHHmm}-{SiloAddress.ToParsableString()}-{counter}";
- var blobClient = _client.GetAppendBlobClient(shardId);
- var metadataInfo = CreateMetadata(metadata, SiloAddress, _clusterMembership.CurrentSnapshot.Version, minDueTime, maxDueTime);
- metadataInfo["Owner"] = SiloAddress.ToParsableString();
- try
- {
- var response = await blobClient.CreateIfNotExistsAsync(metadata: metadataInfo, cancellationToken: cancellationToken);
- if (response == null)
- {
- // Blob already exists, try again with a different name
- LogShardIdCollision(_logger, shardId, i);
- continue;
- }
- }
- catch (RequestFailedException ex)
- {
- i++;
- if (i > _options.MaxBlobCreationRetries)
- {
- throw new InvalidOperationException($"Failed to create shard blob '{shardId}' after {i} attempts", ex);
- }
- // Blob already exists, try again with a different name
- LogShardRegistrationRetry(_logger, ex, shardId, i);
- continue;
- }
-
- var shard = new AzureStorageJobShard(shardId, minDueTime, maxDueTime, blobClient, metadataInfo, null, _options, _loggerFactory.CreateLogger());
- await shard.InitializeAsync(cancellationToken);
- _jobShardCache[shardId] = shard;
- LogShardRegistered(_logger, shardId, SiloAddress);
- return shard;
- }
- }
-
- public override async Task UnregisterShardAsync(Orleans.DurableJobs.IJobShard shard, CancellationToken cancellationToken)
- {
- var azureShard = shard as AzureStorageJobShard ?? throw new ArgumentException("Shard is not an AzureStorageJobShard", nameof(shard));
- LogUnregisteringShard(_logger, shard.Id, SiloAddress);
-
- // Stop the background storage processor to ensure no more changes can happen
- await azureShard.StopProcessorAsync(cancellationToken);
-
- // Now we can safely get a consistent view of the state
- var count = await shard.GetJobCountAsync();
- // We want to make sure to get the latest properties
- var properties = await azureShard.BlobClient.GetPropertiesAsync(cancellationToken: cancellationToken);
-
- // But we don't want to update the metadata if the ETag has changed
- var currentETag = properties.Value.ETag;
- var conditions = new BlobRequestConditions { IfMatch = currentETag };
- var metadata = properties.Value.Metadata;
- var (owner, _, _, _) = ParseMetadata(metadata);
-
- if (owner != SiloAddress)
- {
- LogUnregisterWrongOwner(_logger, shard.Id, SiloAddress, owner);
- throw new InvalidOperationException("Cannot unregister a shard owned by another silo");
- }
-
- if (count > 0)
- {
- // There are still jobs in the shard, release ownership gracefully.
- metadata.Remove("Owner");
- // Reset adopted count since we're gracefully releasing (not crashing)
- metadata.Remove(AdoptedCountKey);
- metadata.Remove(LastAdoptedTimeKey);
- metadata.Remove(LegacyStolenCountKey);
- metadata.Remove(LegacyLastStolenTimeKey);
- await azureShard.BlobClient.SetMetadataAsync(metadata, conditions, cancellationToken);
- _jobShardCache.TryRemove(shard.Id, out _);
- LogShardOwnershipReleased(_logger, shard.Id, SiloAddress, count);
- }
- else
- {
- // No jobs left, we can delete the shard
- await azureShard.BlobClient.DeleteIfExistsAsync(conditions: conditions, cancellationToken: cancellationToken);
- _jobShardCache.TryRemove(shard.Id, out _);
- LogShardDeleted(_logger, shard.Id, SiloAddress);
- }
-
- // Dispose the shard's resources
- await azureShard.DisposeAsync();
- }
-
- private async ValueTask InitializeIfNeeded(CancellationToken cancellationToken = default)
- {
- if (_client != null) return;
-
- LogInitializing(_logger, _containerName);
- _client = _blobServiceClient.GetBlobContainerClient(_containerName);
- await _client.CreateIfNotExistsAsync(cancellationToken: cancellationToken);
- LogInitialized(_logger, _containerName);
- }
-
- private static Dictionary CreateMetadata(IDictionary existingMetadata, SiloAddress siloAddress, MembershipVersion membershipVersion, DateTimeOffset minDueTime, DateTimeOffset maxDueTime)
- {
- var metadata = new Dictionary(existingMetadata)
- {
- { "MinDueTime", minDueTime.ToString("o") },
- { "MaxDueTime", maxDueTime.ToString("o") },
- { "MembershipVersion", membershipVersion.Value.ToString(CultureInfo.InvariantCulture) }
- };
-
- return metadata;
- }
-
- private static (SiloAddress? owner, MembershipVersion membershipVersion, DateTimeOffset minDueTime, DateTimeOffset maxDueTime) ParseMetadata(IDictionary metadata)
- {
- var owner = metadata.TryGetValue("Owner", out var ownerStr) ? SiloAddress.FromParsableString(ownerStr) : null;
- var membershipVersion = metadata.TryGetValue("MembershipVersion", out var membershipVersionStr) && long.TryParse(membershipVersionStr, out var versionValue)
- ? new MembershipVersion(versionValue)
- : MembershipVersion.MinValue;
- var minDueTime = metadata.TryGetValue("MinDueTime", out var minDueTimeStr) && DateTimeOffset.TryParse(minDueTimeStr, out var minDt) ? minDt : DateTimeOffset.MinValue;
- var maxDueTime = metadata.TryGetValue("MaxDueTime", out var maxDueTimeStr) && DateTimeOffset.TryParse(maxDueTimeStr, out var maxDt) ? maxDt : DateTimeOffset.MaxValue;
- return (owner, membershipVersion, minDueTime, maxDueTime);
- }
-
- [LoggerMessage(
- Level = LogLevel.Information,
- Message = "Initializing Azure Storage Job Shard Manager with container '{ContainerName}'"
- )]
- private static partial void LogInitializing(ILogger logger, string containerName);
-
- [LoggerMessage(
- Level = LogLevel.Information,
- Message = "Azure Storage Job Shard Manager initialized successfully for container '{ContainerName}'"
- )]
- private static partial void LogInitialized(ILogger logger, string containerName);
-
- [LoggerMessage(
- Level = LogLevel.Debug,
- Message = "Assigning job shards for silo {SiloAddress} with max time {MaxDateTime} from container '{ContainerName}'"
- )]
- private static partial void LogAssigningShards(ILogger logger, SiloAddress siloAddress, DateTimeOffset maxDateTime, string containerName);
-
- [LoggerMessage(
- Level = LogLevel.Trace,
- Message = "Ignoring shard '{ShardId}' since its start time is greater than specified maximum (MinDueTime={MinDueTime}, MaxDateTime={MaxDateTime})"
- )]
- private static partial void LogShardTooNew(ILogger logger, string shardId, DateTimeOffset minDueTime, DateTimeOffset maxDateTime);
-
- [LoggerMessage(
- Level = LogLevel.Trace,
- Message = "Shard '{ShardId}' is still owned by active silo {Owner}"
- )]
- private static partial void LogShardStillOwned(ILogger logger, string shardId, SiloAddress owner);
-
- [LoggerMessage(
- Level = LogLevel.Debug,
- Message = "Reclaiming shard '{ShardId}' from cache for silo {SiloAddress}"
- )]
- private static partial void LogReclaimingShardFromCache(ILogger logger, string shardId, SiloAddress siloAddress);
-
- [LoggerMessage(
- Level = LogLevel.Debug,
- Message = "Claiming shard '{ShardId}' for silo {SiloAddress} (Previous Owner={PreviousOwner})"
- )]
- private static partial void LogClaimingShard(ILogger logger, string shardId, SiloAddress siloAddress, SiloAddress? previousOwner);
-
- [LoggerMessage(
- Level = LogLevel.Warning,
- Message = "Failed to take ownership of shard '{ShardId}' for silo {SiloAddress} due to conflict"
- )]
- private static partial void LogShardOwnershipConflict(ILogger logger, string shardId, SiloAddress siloAddress);
-
- [LoggerMessage(
- Level = LogLevel.Debug,
- Message = "Shard '{ShardId}' assigned to silo {SiloAddress}"
- )]
- private static partial void LogShardAssigned(ILogger logger, string shardId, SiloAddress siloAddress);
-
- [LoggerMessage(
- Level = LogLevel.Information,
- Message = "Assigned {ShardCount} shard(s) to silo {SiloAddress}"
- )]
- private static partial void LogAssignmentCompleted(ILogger logger, int shardCount, SiloAddress siloAddress);
-
- [LoggerMessage(
- Level = LogLevel.Debug,
- Message = "Took ownership of shard '{ShardId}' for silo {SiloAddress}"
- )]
- private static partial void LogOwnershipTaken(ILogger logger, string shardId, SiloAddress siloAddress);
-
- [LoggerMessage(
- Level = LogLevel.Warning,
- Message = "Failed to take ownership of shard '{ShardId}' for silo {SiloAddress}"
- )]
- private static partial void LogOwnershipFailed(ILogger logger, Exception exception, string shardId, SiloAddress siloAddress);
-
- [LoggerMessage(
- Level = LogLevel.Warning,
- Message = "Failed to release ownership of shard '{ShardId}' that was not in cache"
- )]
- private static partial void LogWarningReleaseOwnershipNotInCache(ILogger logger, Exception exception, string shardId);
-
- [LoggerMessage(
- Level = LogLevel.Information,
- Message = "Creating new shard for silo {SiloAddress} (MinDueTime={MinDueTime}, MaxDueTime={MaxDueTime}) in container '{ContainerName}'"
- )]
- private static partial void LogRegisteringShard(ILogger logger, SiloAddress siloAddress, DateTimeOffset minDueTime, DateTimeOffset maxDueTime, string containerName);
-
- [LoggerMessage(
- Level = LogLevel.Trace,
- Message = "Shard ID collision for '{ShardId}' (attempt {Attempt}), retrying with new ID"
- )]
- private static partial void LogShardIdCollision(ILogger logger, string shardId, int attempt);
-
- [LoggerMessage(
- Level = LogLevel.Warning,
- Message = "Failed to register shard '{ShardId}' (attempt {Attempt}), retrying"
- )]
- private static partial void LogShardRegistrationRetry(ILogger logger, Exception exception, string shardId, int attempt);
-
- [LoggerMessage(
- Level = LogLevel.Information,
- Message = "Shard '{ShardId}' created successfully for silo {SiloAddress}"
- )]
- private static partial void LogShardRegistered(ILogger logger, string shardId, SiloAddress siloAddress);
-
- [LoggerMessage(
- Level = LogLevel.Information,
- Message = "Unregistering shard '{ShardId}' for silo {SiloAddress}"
- )]
- private static partial void LogUnregisteringShard(ILogger logger, string shardId, SiloAddress siloAddress);
-
- [LoggerMessage(
- Level = LogLevel.Warning,
- Message = "Cannot unregister shard '{ShardId}' - silo {SiloAddress} is not the owner (Owner={Owner})"
- )]
- private static partial void LogUnregisterWrongOwner(ILogger logger, string shardId, SiloAddress siloAddress, SiloAddress? owner);
-
- [LoggerMessage(
- Level = LogLevel.Information,
- Message = "Released ownership of shard '{ShardId}' by silo {SiloAddress} ({JobCount} jobs remaining)"
- )]
- private static partial void LogShardOwnershipReleased(ILogger logger, string shardId, SiloAddress siloAddress, int jobCount);
-
- [LoggerMessage(
- Level = LogLevel.Information,
- Message = "Deleted shard '{ShardId}' by silo {SiloAddress} (no jobs remaining)"
- )]
- private static partial void LogShardDeleted(ILogger logger, string shardId, SiloAddress siloAddress);
-
- [LoggerMessage(
- Level = LogLevel.Warning,
- Message = "Poisoned shard detected: '{ShardId}' has been adopted {AdoptedCount} times (max allowed: {MaxAdoptedCount}). Shard will not be assigned."
- )]
- private static partial void LogPoisonedShardDetected(ILogger logger, string shardId, int adoptedCount, int maxAdoptedCount);
-
- [LoggerMessage(
- Level = LogLevel.Information,
- Message = "Shard '{ShardId}' adopted by silo {SiloAddress} (adopted count: {AdoptedCount})"
- )]
- private static partial void LogShardAdopted(ILogger logger, string shardId, SiloAddress siloAddress, int adoptedCount);
-}
diff --git a/src/Azure/Orleans.DurableJobs.AzureStorage/Hosting/AzureStorageDurableJobsExtensions.cs b/src/Azure/Orleans.DurableJobs.AzureStorage/Hosting/AzureStorageDurableJobsExtensions.cs
index ccb8d80fb4b..a204098d7c6 100644
--- a/src/Azure/Orleans.DurableJobs.AzureStorage/Hosting/AzureStorageDurableJobsExtensions.cs
+++ b/src/Azure/Orleans.DurableJobs.AzureStorage/Hosting/AzureStorageDurableJobsExtensions.cs
@@ -1,10 +1,11 @@
using System;
+using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
-using Microsoft.Extensions.Options;
-using Orleans.Configuration;
+using Microsoft.Extensions.DependencyInjection.Extensions;
using Orleans.Configuration.Internal;
using Orleans.DurableJobs;
-using Orleans.DurableJobs.AzureStorage;
+using Orleans.Journaling;
+using Orleans.Journaling.Json;
namespace Orleans.Hosting;
@@ -25,27 +26,15 @@ public static class AzureStorageDurableJobsExtensions
///
/// The provided , for chaining.
///
- public static ISiloBuilder UseAzureBlobDurableJobs(this ISiloBuilder builder, Action configure)
+ public static ISiloBuilder UseAzureBlobDurableJobs(this ISiloBuilder builder, Action configure)
{
- builder.ConfigureServices(services => services.UseAzureBlobDurableJobs(configure));
- return builder;
- }
+ ArgumentNullException.ThrowIfNull(builder);
+ ArgumentNullException.ThrowIfNull(configure);
- ///
- /// Adds durable jobs storage backed by Azure Blob Storage.
- ///
- ///
- /// The builder.
- ///
- ///
- /// The configuration delegate.
- ///
- ///
- /// The provided , for chaining.
- ///
- public static ISiloBuilder UseAzureBlobDurableJobs(this ISiloBuilder builder, Action> configureOptions)
- {
- builder.ConfigureServices(services => services.UseAzureBlobDurableJobs(configureOptions));
+ builder.AddDurableJobs();
+ builder.AddAzureBlobJournalStorage(configure);
+ builder.UseJsonJournalFormat(options => options.AddTypeInfoResolver(DurableJobsJsonContext.Default));
+ builder.Services.UseJournaledDurableJobs();
return builder;
}
@@ -61,36 +50,38 @@ public static ISiloBuilder UseAzureBlobDurableJobs(this ISiloBuilder builder, Ac
///
/// The provided , for chaining.
///
- public static IServiceCollection UseAzureBlobDurableJobs(this IServiceCollection services, Action configure)
+ public static IServiceCollection UseAzureBlobDurableJobs(this IServiceCollection services, Action configure)
{
+ ArgumentNullException.ThrowIfNull(services);
+ ArgumentNullException.ThrowIfNull(configure);
+
services.AddDurableJobs();
- services.AddSingleton();
- services.AddFromExisting();
- services.Configure(configure);
- services.ConfigureFormatter();
+
+ var builder = new ServiceCollectionSiloBuilder(services);
+ builder.AddAzureBlobJournalStorage(configure);
+ builder.UseJsonJournalFormat(options => options.AddTypeInfoResolver(DurableJobsJsonContext.Default));
+
+ services.UseJournaledDurableJobs();
return services;
}
- ///
- /// Adds durable jobs storage backed by Azure Blob Storage.
- ///
- ///
- /// The service collection.
- ///
- ///
- /// The configuration delegate.
- ///
- ///
- /// The provided , for chaining.
- ///
- public static IServiceCollection UseAzureBlobDurableJobs(this IServiceCollection services, Action> configureOptions)
+ private static IServiceCollection UseJournaledDurableJobs(this IServiceCollection services)
{
- services.AddDurableJobs();
- services.AddSingleton();
- services.AddFromExisting();
- configureOptions?.Invoke(services.AddOptions());
- services.ConfigureFormatter();
- services.AddTransient(sp => new AzureStorageJobShardOptionsValidator(sp.GetRequiredService>().Get(Options.DefaultName), Options.DefaultName));
+ services.TryAddSingleton();
+ services.AddFromExisting();
return services;
}
+
+ private sealed class ServiceCollectionSiloBuilder : ISiloBuilder
+ {
+ public ServiceCollectionSiloBuilder(IServiceCollection services)
+ {
+ Services = services;
+ Configuration = new ConfigurationBuilder().Build();
+ }
+
+ public IServiceCollection Services { get; }
+
+ public IConfiguration Configuration { get; }
+ }
}
diff --git a/src/Azure/Orleans.DurableJobs.AzureStorage/Hosting/AzureStorageJobShardOptions.cs b/src/Azure/Orleans.DurableJobs.AzureStorage/Hosting/AzureStorageJobShardOptions.cs
deleted file mode 100644
index 5139b1cd071..00000000000
--- a/src/Azure/Orleans.DurableJobs.AzureStorage/Hosting/AzureStorageJobShardOptions.cs
+++ /dev/null
@@ -1,42 +0,0 @@
-using System;
-using Azure.Storage.Blobs;
-
-namespace Orleans.Hosting;
-
-public class AzureStorageJobShardOptions
-{
- ///
- /// Gets or sets the instance used to store job shards.
- ///
- public BlobServiceClient BlobServiceClient { get; set; } = null!;
-
- ///
- /// Gets or sets the name of the container used to store durable jobs.
- ///
- public string ContainerName { get; set; } = "jobs";
-
- ///
- /// Gets or sets the maximum number of job operations to batch together in a single blob write.
- /// Default is 50 operations.
- ///
- public int MaxBatchSize { get; set; } = 50;
-
- ///
- /// Gets or sets the minimum number of job operations to batch together before flushing.
- /// If more than 1 then the we will wait for additional operations.
- /// Default is 1 operation (immediate flush, optimized for latency).
- ///
- public int MinBatchSize { get; set; } = 1;
-
- ///
- /// Gets or sets the maximum time to wait for additional operations if the minimum batch size isn't reached
- /// before flushing a batch.
- /// Default is 50 milliseconds.
- ///
- public TimeSpan BatchFlushInterval { get; set; } = TimeSpan.FromMilliseconds(50);
-
- ///
- /// Gets or sets the maximum number of retries for creating a blob for a job shard in case of name collisions.
- ///
- public int MaxBlobCreationRetries { get; internal set; } = 3;
-}
diff --git a/src/Azure/Orleans.DurableJobs.AzureStorage/Hosting/AzureStorageJobShardOptionsValidator.cs b/src/Azure/Orleans.DurableJobs.AzureStorage/Hosting/AzureStorageJobShardOptionsValidator.cs
deleted file mode 100644
index 9fbf438b70c..00000000000
--- a/src/Azure/Orleans.DurableJobs.AzureStorage/Hosting/AzureStorageJobShardOptionsValidator.cs
+++ /dev/null
@@ -1,39 +0,0 @@
-using Microsoft.Extensions.Options;
-using Orleans.Configuration.Internal;
-using Orleans.Runtime;
-
-namespace Orleans.Hosting;
-
-///
-/// Validates .
-///
-public class AzureStorageJobShardOptionsValidator : IConfigurationValidator
-{
- private readonly AzureStorageJobShardOptions _options;
- private readonly string _name;
-
- ///
- /// Initializes a new instance of the class.
- ///
- /// The options.
- /// The name.
- public AzureStorageJobShardOptionsValidator(AzureStorageJobShardOptions options, string name)
- {
- _options = options;
- _name = name;
- }
-
- ///
- public void ValidateConfiguration()
- {
- if (_options.BlobServiceClient is null)
- {
- throw new OrleansConfigurationException($"Invalid configuration for {nameof(AzureStorageJobShardOptions)} with name '{_name}'. {nameof(_options.BlobServiceClient)} is required.");
- }
-
- if (string.IsNullOrWhiteSpace(_options.ContainerName))
- {
- throw new OrleansConfigurationException($"Invalid configuration for {nameof(AzureStorageJobShardOptions)} with name '{_name}'. {nameof(_options.ContainerName)} is required.");
- }
- }
-}
diff --git a/src/Azure/Orleans.DurableJobs.AzureStorage/JobOperation.cs b/src/Azure/Orleans.DurableJobs.AzureStorage/JobOperation.cs
deleted file mode 100644
index 834e858ada3..00000000000
--- a/src/Azure/Orleans.DurableJobs.AzureStorage/JobOperation.cs
+++ /dev/null
@@ -1,110 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Text.Json;
-using System.Text.Json.Serialization;
-using Orleans.Runtime;
-
-namespace Orleans.DurableJobs.AzureStorage;
-
-///
-/// Represents an operation to be performed on a durable job.
-///
-internal struct JobOperation
-{
- ///
- /// The type of operation to perform.
- ///
- public enum OperationType
- {
- Add,
- Remove,
- Retry,
- }
-
- ///
- /// Gets or sets the type of operation.
- ///
- public OperationType Type { get; init; }
-
- ///
- /// Gets or sets the job identifier.
- ///
- public string Id { get; init; }
-
- ///
- /// Gets or sets the job name (only used for Add operations).
- ///
- public string? Name { get; init; }
-
- ///
- /// Gets or sets the due time (used for Add and Retry operations).
- ///
- public DateTimeOffset? DueTime { get; init; }
-
- ///
- /// Gets or sets the target grain ID (only used for Add operations).
- ///
- public GrainId? TargetGrainId { get; init; }
-
- ///
- /// Gets or sets the job metadata (only used for Add operations).
- ///
- public IReadOnlyDictionary? Metadata { get; init; }
-
- ///
- /// Creates an Add operation for scheduling a new job.
- ///
- /// The job identifier.
- /// The job name.
- /// The job due time.
- /// The target grain ID.
- /// The job metadata.
- /// A new JobOperation for adding a job.
- /// Thrown when or is null or empty.
- public static JobOperation CreateAddOperation(string id, string name, DateTimeOffset dueTime, GrainId targetGrainId, IReadOnlyDictionary? metadata)
- {
- ArgumentException.ThrowIfNullOrEmpty(id);
- ArgumentException.ThrowIfNullOrEmpty(name);
-
- return new() { Type = OperationType.Add, Id = id, Name = name, DueTime = dueTime, TargetGrainId = targetGrainId, Metadata = metadata };
- }
-
- ///
- /// Creates a Remove operation for canceling a job.
- ///
- /// The job identifier.
- /// A new JobOperation for removing a job.
- /// Thrown when is null or empty.
- public static JobOperation CreateRemoveOperation(string id)
- {
- ArgumentException.ThrowIfNullOrEmpty(id);
-
- return new() { Type = OperationType.Remove, Id = id };
- }
-
- ///
- /// Creates a Retry operation for rescheduling a job.
- ///
- /// The job identifier.
- /// The new due time.
- /// A new JobOperation for retrying a job.
- /// Thrown when is null or empty.
- public static JobOperation CreateRetryOperation(string id, DateTimeOffset dueTime)
- {
- ArgumentException.ThrowIfNullOrEmpty(id);
-
- return new() { Type = OperationType.Retry, Id = id, DueTime = dueTime };
- }
-}
-
-///
-/// JSON serialization context for JobOperation with compile-time source generation.
-///
-[JsonSerializable(typeof(JobOperation))]
-[JsonSourceGenerationOptions(
- DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
- PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase,
- WriteIndented = false)]
-internal partial class JobOperationJsonContext : JsonSerializerContext
-{
-}
\ No newline at end of file
diff --git a/src/Azure/Orleans.DurableJobs.AzureStorage/NetstringJsonSerializer.cs b/src/Azure/Orleans.DurableJobs.AzureStorage/NetstringJsonSerializer.cs
deleted file mode 100644
index f47575c2162..00000000000
--- a/src/Azure/Orleans.DurableJobs.AzureStorage/NetstringJsonSerializer.cs
+++ /dev/null
@@ -1,168 +0,0 @@
-using System;
-using System.Buffers;
-using System.Buffers.Text;
-using System.Collections.Generic;
-using System.IO;
-using System.Runtime.CompilerServices;
-using System.Text.Json;
-using System.Text.Json.Serialization.Metadata;
-using System.Threading;
-using System.Threading.Tasks;
-using Orleans.Serialization.Buffers.Adaptors;
-
-namespace Orleans.DurableJobs.AzureStorage;
-
-///
-/// Provides methods for serializing and deserializing JSON data using the netstring format.
-/// Netstrings are a simple, self-delimiting way to encode data with length prefixes.
-/// Format: [6 hex digits]:[data]\n
-/// Maximum data size is 10MB (0xA00000 bytes).
-///
-public static class NetstringJsonSerializer
-{
- private const int MaxLength = 0xA00000; // 10MB
-
- ///
- /// Encodes an object as a netstring by serializing it to JSON and writing directly to a stream.
- ///
- /// The object to encode.
- /// The stream to write the netstring-encoded data to.
- /// The JSON type info for serialization.
- /// Thrown when the serialized data exceeds the maximum length.
- public static void Encode(T value, Stream stream, JsonTypeInfo jsonTypeInfo)
- {
- // Remember starting position
- var startPosition = stream.Position;
-
- // Skip past where the length prefix will go (6 hex digits + colon)
- Span lengthBytes = stackalloc byte[7];
- stream.Write(lengthBytes);
-
- // Remember position where data starts
- var dataStartPosition = stream.Position;
-
- // Serialize JSON directly to stream
- using (var writer = new Utf8JsonWriter(stream, new JsonWriterOptions { SkipValidation = false }))
- {
- JsonSerializer.Serialize(writer, value, jsonTypeInfo);
- }
-
- stream.Flush();
-
- // Calculate JSON length
- var jsonLength = (int)(stream.Position - dataStartPosition);
-
- if (jsonLength > MaxLength)
- {
- throw new InvalidOperationException($"Serialized data exceeds maximum length of {MaxLength} bytes");
- }
-
- // Write trailing newline
- stream.WriteByte((byte)'\n');
-
- // Remember end position
- var endPosition = stream.Position;
-
- // Seek back to write the length prefix
- stream.Position = startPosition;
-
- // Format length as 6-digit hex and write directly
- if (!Utf8Formatter.TryFormat(jsonLength, lengthBytes, out _, new StandardFormat('X', 6)))
- {
- throw new InvalidOperationException("Failed to format length prefix");
- }
-
- lengthBytes[6] = (byte)':';
-
- stream.Write(lengthBytes);
-
- // Restore position to end
- stream.Position = endPosition;
- }
-
- ///
- /// Reads netstring-encoded JSON objects from a stream and deserializes them.
- ///
- /// The stream to read from.
- /// The JSON type info for deserialization.
- /// The cancellation token to cancel the operation.
- /// An async enumerable of deserialized objects.
- /// Thrown when the stream contains invalid netstring data.
- public static async IAsyncEnumerable DecodeAsync(Stream stream, JsonTypeInfo jsonTypeInfo, [EnumeratorCancellation] CancellationToken cancellationToken)
- {
- const int TypicalBufferSize = 4096; // 4KB
- var buffer = ArrayPool.Shared.Rent(TypicalBufferSize);
-
- try
- {
- while (true)
- {
-
- // Try to read length prefix (6 hex digits + colon)
- try
- {
- await stream.ReadExactlyAsync(buffer, 0, 7, cancellationToken);
- }
- catch (EndOfStreamException)
- {
- // We are done
- yield break;
- }
-
- // Verify colon
- if (buffer[6] != ':')
- {
- throw new InvalidDataException($"Expected colon at position 6, got byte value {buffer[6]}");
- }
-
- // Parse length as hex
- if (!Utf8Parser.TryParse(buffer.AsSpan(0, 6), out int length, out _, 'X'))
- {
- throw new InvalidDataException($"Invalid netstring length: {System.Text.Encoding.UTF8.GetString(buffer, 0, 6)}");
- }
-
- if (length < 0 || length > MaxLength)
- {
- throw new InvalidDataException($"Netstring length out of valid range: {length}");
- }
-
- // Ensure buffer is large enough for the data + newline
- var totalLength = length + 1;
- if (buffer.Length < totalLength)
- {
- ArrayPool.Shared.Return(buffer);
- buffer = ArrayPool.Shared.Rent(totalLength);
- }
-
- // Read data + trailing newline
- try
- {
- await stream.ReadExactlyAsync(buffer.AsMemory(0, totalLength), cancellationToken);
- }
- catch (EndOfStreamException ex)
- {
- throw new InvalidDataException("Unexpected end of stream while reading netstring data", ex);
- }
-
- // Verify trailing newline
- if (buffer[length] != '\n')
- {
- throw new InvalidDataException($"Expected newline at end of netstring, got byte value {buffer[length]}");
- }
-
- // Deserialize JSON directly from UTF-8 bytes
- var result = JsonSerializer.Deserialize(buffer.AsSpan(0, length), jsonTypeInfo);
- if (result is null)
- {
- throw new JsonException("Deserialized JSON resulted in null value");
- }
-
- yield return result;
- }
- }
- finally
- {
- ArrayPool.Shared.Return(buffer);
- }
- }
-}
diff --git a/src/Azure/Orleans.DurableJobs.AzureStorage/Orleans.DurableJobs.AzureStorage.csproj b/src/Azure/Orleans.DurableJobs.AzureStorage/Orleans.DurableJobs.AzureStorage.csproj
index cf1848b99ed..a2990fa9144 100644
--- a/src/Azure/Orleans.DurableJobs.AzureStorage/Orleans.DurableJobs.AzureStorage.csproj
+++ b/src/Azure/Orleans.DurableJobs.AzureStorage/Orleans.DurableJobs.AzureStorage.csproj
@@ -12,6 +12,7 @@
true
$(DefineConstants)
enable
+ $(NoWarn);ORLEANSEXP005
$(VersionSuffix).alpha.1
alpha.1
@@ -19,8 +20,7 @@
-
-
+
diff --git a/src/Azure/Orleans.DurableJobs.AzureStorage/README.md b/src/Azure/Orleans.DurableJobs.AzureStorage/README.md
index 29757f6b3c5..6c2557efbe2 100644
--- a/src/Azure/Orleans.DurableJobs.AzureStorage/README.md
+++ b/src/Azure/Orleans.DurableJobs.AzureStorage/README.md
@@ -27,13 +27,10 @@ builder.UseOrleans(siloBuilder =>
{
siloBuilder
.UseAzureStorageClustering(options => options.ConfigureTableServiceClient("YOUR_STORAGE_ACCOUNT_URI"))
- .UseAzureStorageDurableJobs(options =>
+ .UseAzureBlobDurableJobs(options =>
{
- options.Configure(o =>
- {
- o.BlobServiceClient = new BlobServiceClient("YOUR_AZURE_STORAGE_CONNECTION_STRING");
- o.ContainerName = "durable-jobs";
- });
+ options.BlobServiceClient = new BlobServiceClient("YOUR_AZURE_STORAGE_CONNECTION_STRING");
+ options.ContainerName = "durable-jobs";
});
});
@@ -53,16 +50,13 @@ builder.UseOrleans(siloBuilder =>
{
siloBuilder
.UseAzureStorageClustering(options => options.ConfigureTableServiceClient("YOUR_STORAGE_ACCOUNT_URI"))
- .UseAzureStorageDurableJobs(options =>
+ .UseAzureBlobDurableJobs(options =>
{
- options.Configure(o =>
- {
- var credential = new DefaultAzureCredential();
- o.BlobServiceClient = new BlobServiceClient(
- new Uri("https://youraccount.blob.core.windows.net"),
- credential);
- o.ContainerName = "durable-jobs";
- });
+ var credential = new DefaultAzureCredential();
+ options.BlobServiceClient = new BlobServiceClient(
+ new Uri("https://youraccount.blob.core.windows.net"),
+ credential);
+ options.ContainerName = "durable-jobs";
});
});
@@ -78,14 +72,11 @@ builder.UseOrleans(siloBuilder =>
{
siloBuilder
.UseAzureStorageClustering(options => options.ConfigureTableServiceClient(connectionString))
- .UseAzureStorageDurableJobs(options =>
+ .UseAzureBlobDurableJobs(options =>
{
- options.Configure(o =>
- {
- o.BlobServiceClient = new BlobServiceClient(connectionString);
- // Use different containers for different environments
- o.ContainerName = $"durable-jobs-{Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT")?.ToLowerInvariant()}";
- });
+ options.BlobServiceClient = new BlobServiceClient(connectionString);
+ // Use different containers for different environments
+ options.ContainerName = $"durable-jobs-{Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT")?.ToLowerInvariant()}";
})
.ConfigureServices(services =>
{
diff --git a/src/Azure/Orleans.Journaling.AzureStorage/AzureBlobJournalStorage.cs b/src/Azure/Orleans.Journaling.AzureStorage/AzureBlobJournalStorage.cs
index 3c29b0dc8be..ccdd4d6c360 100644
--- a/src/Azure/Orleans.Journaling.AzureStorage/AzureBlobJournalStorage.cs
+++ b/src/Azure/Orleans.Journaling.AzureStorage/AzureBlobJournalStorage.cs
@@ -57,6 +57,91 @@ internal AzureBlobJournalStorage(
_walClient = GetWalClient();
}
+ public async ValueTask CreateIfNotExistsAsync(
+ IReadOnlyDictionary? metadata = null,
+ CancellationToken cancellationToken = default)
+ {
+ var callerMetadata = CopyAndValidateCallerMetadata(metadata);
+ try
+ {
+ var response = await CreateWalAsync(
+ checkpointName: null,
+ new AppendBlobRequestConditions { IfNoneMatch = ETag.All },
+ cancellationToken,
+ callerMetadata).ConfigureAwait(false);
+ SetWal(response.Value.ETag, blockCount: 0);
+ return true;
+ }
+ catch (RequestFailedException exception) when (exception.Status is 409 or 412)
+ {
+ return false;
+ }
+ }
+
+ public async ValueTask GetMetadataAsync(CancellationToken cancellationToken = default)
+ {
+ var properties = await GetPropertiesCoreAsync(_walClient, conditions: null, cancellationToken).ConfigureAwait(false);
+ return properties is null || properties.BlobType != BlobType.Append
+ ? null
+ : CreateJournalMetadata(properties.ETag, properties.Metadata);
+ }
+
+ public async ValueTask UpdateMetadataAsync(
+ IReadOnlyDictionary? set = null,
+ IEnumerable? remove = null,
+ string? expectedETag = null,
+ CancellationToken cancellationToken = default)
+ {
+ var setValues = CopyAndValidateCallerMetadata(set);
+ var removeValues = CopyRemove(remove, setValues);
+ for (var attempt = 0; attempt < 3; attempt++)
+ {
+ BlobProperties? properties;
+ try
+ {
+ properties = await GetPropertiesCoreAsync(
+ _walClient,
+ expectedETag is null ? null : new BlobRequestConditions { IfMatch = ToAzureETag(expectedETag) },
+ cancellationToken).ConfigureAwait(false);
+ }
+ catch (RequestFailedException exception) when (exception.Status is 412)
+ {
+ return null;
+ }
+
+ if (properties is null || properties.BlobType != BlobType.Append)
+ {
+ return null;
+ }
+
+ var metadata = CopyMetadata(properties.Metadata);
+ if (!ApplyCallerMetadataUpdate(metadata, setValues, removeValues))
+ {
+ return CreateJournalMetadata(properties.ETag, metadata);
+ }
+
+ var conditions = new BlobRequestConditions
+ {
+ IfMatch = expectedETag is null ? properties.ETag : ToAzureETag(expectedETag),
+ };
+
+ try
+ {
+ var response = await _walClient.SetMetadataAsync(metadata, conditions, cancellationToken).ConfigureAwait(false);
+ return CreateJournalMetadata(response.Value.ETag, metadata);
+ }
+ catch (RequestFailedException exception) when (exception.Status is 412)
+ {
+ if (expectedETag is not null)
+ {
+ return null;
+ }
+ }
+ }
+
+ return null;
+ }
+
public async ValueTask AppendAsync(ReadOnlySequence value, CancellationToken cancellationToken)
{
// Appends are written as one Azure append block, so validate blob limits before touching storage.
@@ -220,8 +305,8 @@ public async ValueTask ReadAsync(IJournalStorageConsumer consumer, CancellationT
var walMetadata = manifest.Metadata.Format is { Length: > 0 }
? manifest.Metadata
: expectedFormat is { Length: > 0 }
- ? new JournalFileMetadata(expectedFormat)
- : JournalFileMetadata.Empty;
+ ? new JournalMetadata(expectedFormat)
+ : JournalMetadata.Empty;
var totalWalBytes = await consumer.ReadAsync(
walStream,
walMetadata,
@@ -239,33 +324,29 @@ public async ValueTask ReplaceAsync(ReadOnlySequence value, CancellationTo
await EnsureWalAsync(cancellationToken).ConfigureAwait(false);
var expectedWalETag = _walETag;
- string? previousCheckpointName = null;
- if (_shared.Options.DeleteOldCheckpoints)
+ WalState? walState;
+ try
{
- // Read the WAL manifest only when cleanup needs the previous checkpoint name, and require the cached ETag to still match.
- WalState? walState;
- try
- {
- walState = await TryLoadWalStateAsync(new BlobRequestConditions { IfMatch = expectedWalETag }, cancellationToken).ConfigureAwait(false);
-
- if (walState is null)
- {
- throw CreateInconsistentWalStateException(
- "Azure Blob journal WAL changed while publishing a checkpoint; recovery is required.",
- expectedWalETag);
- }
- }
- catch (RequestFailedException exception) when (IsWalMutationConflict(exception))
+ // Read the WAL manifest so compaction preserves caller-owned metadata while replacing provider-owned checkpoint metadata.
+ walState = await TryLoadWalStateAsync(new BlobRequestConditions { IfMatch = expectedWalETag }, cancellationToken).ConfigureAwait(false);
+ if (walState is null)
{
throw CreateInconsistentWalStateException(
"Azure Blob journal WAL changed while publishing a checkpoint; recovery is required.",
- expectedWalETag,
- exception);
+ expectedWalETag);
}
-
- expectedWalETag = walState.Value.ETag;
- previousCheckpointName = walState.Value.Manifest.Checkpoint?.Name;
}
+ catch (RequestFailedException exception) when (IsWalMutationConflict(exception))
+ {
+ throw CreateInconsistentWalStateException(
+ "Azure Blob journal WAL changed while publishing a checkpoint; recovery is required.",
+ expectedWalETag,
+ exception);
+ }
+
+ expectedWalETag = walState.Value.ETag;
+ var previousCheckpointName = _shared.Options.DeleteOldCheckpoints ? walState.Value.Manifest.Checkpoint?.Name : null;
+ var callerMetadata = walState.Value.Manifest.Metadata.Properties;
using var checkpointStream = new ReadOnlySequenceStream(value);
while (true)
@@ -299,7 +380,8 @@ await checkpointClient.UploadAsync(
var result = await CreateWalAsync(
checkpointName,
new AppendBlobRequestConditions { IfMatch = expectedWalETag },
- cancellationToken).ConfigureAwait(false);
+ cancellationToken,
+ callerMetadata).ConfigureAwait(false);
SetWal(result.Value.ETag, blockCount: 0);
}
catch (RequestFailedException exception) when (IsWalMutationConflict(exception))
@@ -412,7 +494,8 @@ private AppendBlobClient GetWalClient()
private async ValueTask> CreateWalAsync(
string? checkpointName,
AppendBlobRequestConditions conditions,
- CancellationToken cancellationToken)
+ CancellationToken cancellationToken,
+ IReadOnlyDictionary? callerMetadata = null)
{
// Creating an append blob is also how compaction publishes a fresh WAL manifest.
return await _walClient.CreateAsync(
@@ -420,7 +503,7 @@ private async ValueTask> CreateWalAsync(
{
Conditions = conditions,
HttpHeaders = CreateHttpHeaders(_shared.MimeType),
- Metadata = CreateWalMetadata(checkpointName, checkpointOffset: 0),
+ Metadata = CreateWalMetadata(checkpointName, checkpointOffset: 0, callerMetadata),
},
cancellationToken).ConfigureAwait(false);
}
@@ -478,10 +561,22 @@ private Dictionary CreateMetadataDictionary()
private Dictionary CreateCheckpointBlobMetadata() => CreateMetadataDictionary();
- private Dictionary CreateWalMetadata(string? checkpointName, long checkpointOffset)
+ private Dictionary CreateWalMetadata(
+ string? checkpointName,
+ long checkpointOffset,
+ IReadOnlyDictionary? callerMetadata = null)
{
// WAL metadata is the recovery manifest: common format plus optional checkpoint pointer and WAL offset.
var metadata = CreateMetadataDictionary();
+ if (callerMetadata is not null)
+ {
+ foreach (var (key, value) in callerMetadata)
+ {
+ ValidateCallerMetadataProperty(key, value);
+ metadata[key] = value;
+ }
+ }
+
if (checkpointName is not null)
{
metadata[CheckpointMetadataKey] = checkpointName;
@@ -501,9 +596,7 @@ private Dictionary CreateWalMetadata(string? checkpointName, lon
private static WalManifest CreateWalManifest(IDictionary? metadata)
{
// Decode the WAL manifest, accepting non-compacted WALs that have no checkpoint pointer.
- var fileMetadata = GetFormatKeyMetadata(metadata) is { } format
- ? new JournalFileMetadata(format)
- : JournalFileMetadata.Empty;
+ var fileMetadata = CreateJournalMetadata(eTag: default, metadata);
if (metadata is null || !metadata.TryGetValue(CheckpointMetadataKey, out var checkpointName) || checkpointName is not { Length: > 0 })
{
return new WalManifest(fileMetadata, Checkpoint: null);
@@ -521,7 +614,7 @@ private static WalManifest CreateWalManifest(IDictionary? metada
return new WalManifest(fileMetadata, new CheckpointReference(checkpointName, checkpointOffset));
}
- private static IJournalFileMetadata ValidateCheckpointMetadata(CheckpointReference checkpoint, BlobDownloadDetails checkpointDetails, string? expectedFormat)
+ private static IJournalMetadata ValidateCheckpointMetadata(CheckpointReference checkpoint, BlobDownloadDetails checkpointDetails, string? expectedFormat)
{
// Refuse to stitch checkpoint and WAL data together if their declared journal formats differ.
var checkpointBlobFormat = GetFormatKeyMetadata(checkpointDetails.Metadata);
@@ -540,9 +633,152 @@ private static IJournalFileMetadata ValidateCheckpointMetadata(CheckpointReferen
}
}
- return checkpointBlobFormat is { } format
- ? new JournalFileMetadata(format)
- : JournalFileMetadata.Empty;
+ return CreateJournalMetadata(eTag: default, checkpointDetails.Metadata);
+ }
+
+ private static async ValueTask GetPropertiesCoreAsync(
+ AppendBlobClient blobClient,
+ BlobRequestConditions? conditions,
+ CancellationToken cancellationToken)
+ {
+ try
+ {
+ var response = await blobClient.GetPropertiesAsync(conditions, cancellationToken).ConfigureAwait(false);
+ return response.Value;
+ }
+ catch (RequestFailedException exception) when (exception.Status is 404)
+ {
+ return null;
+ }
+ }
+
+ private static IJournalMetadata CreateJournalMetadata(ETag eTag, IDictionary? metadata)
+ => new JournalMetadata(
+ GetFormatKeyMetadata(metadata),
+ eTag == default ? null : eTag.ToString(),
+ CopyCallerMetadata(metadata));
+
+ private static Dictionary CopyCallerMetadata(IDictionary? metadata)
+ {
+ var result = new Dictionary(StringComparer.Ordinal);
+ if (metadata is null)
+ {
+ return result;
+ }
+
+ foreach (var (key, value) in metadata)
+ {
+ if (IsProviderMetadataKey(key))
+ {
+ continue;
+ }
+
+ result[key] = value;
+ }
+
+ return result;
+ }
+
+ private static Dictionary CopyAndValidateCallerMetadata(IReadOnlyDictionary? metadata)
+ {
+ var result = new Dictionary(StringComparer.Ordinal);
+ if (metadata is null)
+ {
+ return result;
+ }
+
+ foreach (var (key, value) in metadata)
+ {
+ ValidateCallerMetadataProperty(key, value);
+ result.Add(key, value);
+ }
+
+ return result;
+ }
+
+ private static Dictionary CopyMetadata(IDictionary? metadata)
+ => metadata is null
+ ? new Dictionary(StringComparer.OrdinalIgnoreCase)
+ : new Dictionary(metadata, StringComparer.OrdinalIgnoreCase);
+
+ private static IReadOnlySet CopyRemove(IEnumerable? remove, IReadOnlyDictionary set)
+ {
+ if (remove is null)
+ {
+ return new HashSet(StringComparer.Ordinal);
+ }
+
+ var result = new HashSet(StringComparer.Ordinal);
+ foreach (var propertyName in remove)
+ {
+ ValidateCallerMetadataPropertyName(propertyName);
+ if (set.ContainsKey(propertyName))
+ {
+ throw new ArgumentException($"Journal metadata property '{propertyName}' cannot be both set and removed.", nameof(remove));
+ }
+
+ result.Add(propertyName);
+ }
+
+ return result;
+ }
+
+ private static bool ApplyCallerMetadataUpdate(
+ Dictionary metadata,
+ IReadOnlyDictionary set,
+ IReadOnlySet remove)
+ {
+ var changed = false;
+ foreach (var propertyName in remove)
+ {
+ ValidateCallerMetadataPropertyName(propertyName);
+ changed |= metadata.Remove(propertyName);
+ }
+
+ foreach (var (propertyName, value) in set)
+ {
+ ValidateCallerMetadataProperty(propertyName, value);
+ if (!metadata.TryGetValue(propertyName, out var currentValue)
+ || !string.Equals(currentValue, value, StringComparison.Ordinal))
+ {
+ metadata[propertyName] = value;
+ changed = true;
+ }
+ }
+
+ return changed;
+ }
+
+ private static void ValidateCallerMetadataProperty(string key, string value)
+ {
+ ValidateCallerMetadataPropertyName(key);
+ ArgumentNullException.ThrowIfNull(value);
+ }
+
+ private static void ValidateCallerMetadataPropertyName(string key)
+ {
+ ArgumentException.ThrowIfNullOrWhiteSpace(key);
+ if (key.IndexOf('\0') >= 0)
+ {
+ throw new ArgumentException("Journal metadata property names must not contain null characters.", nameof(key));
+ }
+
+ if (IsProviderMetadataKey(key))
+ {
+ throw new ArgumentException($"Journal metadata property '{key}' is provider-owned.", nameof(key));
+ }
+ }
+
+ private static bool IsProviderMetadataKey(string key)
+ => string.Equals(key, FormatMetadataKey, StringComparison.OrdinalIgnoreCase)
+ || string.Equals(key, CheckpointMetadataKey, StringComparison.OrdinalIgnoreCase)
+ || string.Equals(key, CheckpointOffsetMetadataKey, StringComparison.OrdinalIgnoreCase)
+ || key.StartsWith("$", StringComparison.Ordinal);
+
+ private static ETag ToAzureETag(string eTag)
+ {
+ ArgumentException.ThrowIfNullOrWhiteSpace(eTag);
+ return new ETag(eTag);
}
private static bool IsBlobSealed(RequestFailedException exception)
@@ -590,7 +826,7 @@ private static InconsistentStateException CreateInconsistentWalStateException(st
Message = "Failed to delete obsolete Azure Blob journal checkpoint \"{ContainerName}/{BlobName}\"")]
private static partial void LogCheckpointCleanupFailure(ILogger logger, string containerName, string blobName, Exception exception);
- private sealed record WalManifest(IJournalFileMetadata Metadata, CheckpointReference? Checkpoint);
+ private sealed record WalManifest(IJournalMetadata Metadata, CheckpointReference? Checkpoint);
private readonly record struct WalState(ETag ETag, WalManifest Manifest);
diff --git a/src/Azure/Orleans.Journaling.AzureStorage/AzureBlobJournalStorageProvider.cs b/src/Azure/Orleans.Journaling.AzureStorage/AzureBlobJournalStorageProvider.cs
index 0a08b3051a2..615f13763a0 100644
--- a/src/Azure/Orleans.Journaling.AzureStorage/AzureBlobJournalStorageProvider.cs
+++ b/src/Azure/Orleans.Journaling.AzureStorage/AzureBlobJournalStorageProvider.cs
@@ -1,14 +1,21 @@
+using System.Runtime.CompilerServices;
+using Azure;
+using Azure.Storage.Blobs;
+using Azure.Storage.Blobs.Models;
+using Azure.Storage.Blobs.Specialized;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
+using Orleans.Runtime;
namespace Orleans.Journaling;
-internal sealed class AzureBlobJournalStorageProvider : ILifecycleParticipant, IJournalStorageProvider
+internal sealed class AzureBlobJournalStorageProvider : ILifecycleParticipant, IJournalStorageProvider, IJournalStorageCatalog
{
private readonly IBlobContainerFactory _containerFactory;
private readonly AzureBlobJournalStorageOptions _options;
private readonly AzureBlobJournalStorage.AzureBlobJournalStorageShared _shared;
+ private BlobContainerClient? _defaultContainer;
public AzureBlobJournalStorageProvider(
IOptions options,
@@ -31,6 +38,8 @@ public AzureBlobJournalStorageProvider(
private async Task Initialize(CancellationToken cancellationToken)
{
var client = await _options.CreateClient!(cancellationToken);
+ _defaultContainer = client.GetBlobContainerClient(_options.ContainerName);
+ await _defaultContainer.CreateIfNotExistsAsync(cancellationToken: cancellationToken).ConfigureAwait(false);
await _containerFactory.InitializeAsync(client, cancellationToken).ConfigureAwait(false);
}
@@ -44,6 +53,43 @@ public IJournalStorage CreateStorage(JournalId journalId)
return new AzureBlobJournalStorage(_shared, journalId);
}
+ public async IAsyncEnumerable ListAsync(
+ JournalId prefix = default,
+ [EnumeratorCancellation] CancellationToken cancellationToken = default)
+ {
+ var container = GetDefaultContainerClient();
+ var blobPrefix = prefix.IsDefault ? null : prefix.Value;
+ var journalIds = new List();
+ await foreach (var item in container.GetBlobsAsync(
+ traits: BlobTraits.None,
+ states: BlobStates.None,
+ prefix: blobPrefix,
+ cancellationToken: cancellationToken))
+ {
+ if (item.Properties.BlobType is { } blobType && blobType != BlobType.Append)
+ {
+ continue;
+ }
+
+ if (!item.Name.EndsWith("/wal", StringComparison.Ordinal))
+ {
+ continue;
+ }
+
+ var storageIdValue = item.Name[..^"/wal".Length];
+ if (TryParseJournalId(storageIdValue, out var journalId) && prefix.IsPrefixOf(journalId))
+ {
+ journalIds.Add(journalId);
+ }
+ }
+
+ foreach (var journalId in journalIds.OrderBy(static journalId => journalId.Value, StringComparer.Ordinal))
+ {
+ cancellationToken.ThrowIfCancellationRequested();
+ yield return journalId;
+ }
+ }
+
public void Participate(ISiloLifecycle observer)
{
observer.Subscribe(
@@ -52,6 +98,24 @@ public void Participate(ISiloLifecycle observer)
onStart: Initialize);
}
+ private BlobContainerClient GetDefaultContainerClient()
+ => _defaultContainer ?? throw new InvalidOperationException(
+ $"{nameof(AzureBlobJournalStorageProvider)} has not been initialized. Ensure the silo lifecycle has started before using journal storage.");
+
+ private static bool TryParseJournalId(string value, out JournalId journalId)
+ {
+ try
+ {
+ journalId = new JournalId(value);
+ return true;
+ }
+ catch (ArgumentException)
+ {
+ journalId = default;
+ return false;
+ }
+ }
+
private static IJournalFormat GetJournalFormat(IServiceProvider serviceProvider, string journalFormatKey)
{
var journalFormat = serviceProvider.GetKeyedService(journalFormatKey);
diff --git a/src/Azure/Orleans.Journaling.AzureStorage/AzureBlobStorageHostingExtensions.cs b/src/Azure/Orleans.Journaling.AzureStorage/AzureBlobStorageHostingExtensions.cs
index 9bf1088bf7c..a5f0702e370 100644
--- a/src/Azure/Orleans.Journaling.AzureStorage/AzureBlobStorageHostingExtensions.cs
+++ b/src/Azure/Orleans.Journaling.AzureStorage/AzureBlobStorageHostingExtensions.cs
@@ -22,6 +22,7 @@ public static ISiloBuilder AddAzureBlobJournalStorage(this ISiloBuilder builder,
{
builder.Services.AddSingleton();
builder.Services.AddFromExisting();
+ builder.Services.AddFromExisting();
builder.Services.AddFromExisting, AzureBlobJournalStorageProvider>();
}
return builder;
diff --git a/src/Orleans.DurableJobs/DurableJobsJsonContext.cs b/src/Orleans.DurableJobs/DurableJobsJsonContext.cs
new file mode 100644
index 00000000000..cb97b4bb2e5
--- /dev/null
+++ b/src/Orleans.DurableJobs/DurableJobsJsonContext.cs
@@ -0,0 +1,19 @@
+using System;
+using System.Collections.Generic;
+using System.Text.Json.Serialization;
+
+namespace Orleans.DurableJobs;
+
+[JsonSerializable(typeof(DurableJob))]
+[JsonSerializable(typeof(DurableJobShardJournalRecord))]
+[JsonSerializable(typeof(DurableJobShardRemoveOperation))]
+[JsonSerializable(typeof(DurableJobShardRetryOperation))]
+[JsonSerializable(typeof(DurableJobShardScheduleOperation))]
+[JsonSerializable(typeof(DurableJobShardSnapshot))]
+[JsonSerializable(typeof(DurableJobShardSnapshotEntry))]
+[JsonSerializable(typeof(Dictionary))]
+[JsonSerializable(typeof(DateTime))]
+[JsonSerializable(typeof(string))]
+[JsonSerializable(typeof(uint))]
+[JsonSerializable(typeof(ulong))]
+internal sealed partial class DurableJobsJsonContext : JsonSerializerContext;
diff --git a/src/Orleans.DurableJobs/Hosting/DurableJobsExtensions.cs b/src/Orleans.DurableJobs/Hosting/DurableJobsExtensions.cs
index f0cd24844a3..7f894661617 100644
--- a/src/Orleans.DurableJobs/Hosting/DurableJobsExtensions.cs
+++ b/src/Orleans.DurableJobs/Hosting/DurableJobsExtensions.cs
@@ -1,10 +1,13 @@
using System.Linq;
+using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Logging;
-using Microsoft.Extensions.Options;
using Orleans.Configuration.Internal;
using Orleans.Runtime;
using Orleans.DurableJobs;
+using Orleans.Journaling;
+using Orleans.Journaling.Json;
namespace Orleans.Hosting;
@@ -32,6 +35,7 @@ public static void AddDurableJobs(this IServiceCollection services)
}
services.AddSingleton();
+ services.AddSingleton();
services.AddSingleton();
services.AddSingleton();
services.AddFromExisting();
@@ -54,8 +58,10 @@ public static void AddDurableJobs(this IServiceCollection services)
public static ISiloBuilder UseInMemoryDurableJobs(this ISiloBuilder builder)
{
builder.AddDurableJobs();
+ builder.AddJournalStorage();
+ builder.UseJsonJournalFormat(options => options.AddTypeInfoResolver(DurableJobsJsonContext.Default));
- builder.ConfigureServices(services => services.UseInMemoryDurableJobs());
+ builder.ConfigureServices(services => services.UseVolatileJournaledDurableJobs());
return builder;
}
@@ -69,14 +75,32 @@ public static ISiloBuilder UseInMemoryDurableJobs(this ISiloBuilder builder)
/// The provided , for chaining.
internal static IServiceCollection UseInMemoryDurableJobs(this IServiceCollection services)
{
- services.AddSingleton(sp =>
- {
- var siloDetails = sp.GetRequiredService();
- var membershipService = sp.GetRequiredService();
- var durableJobsOptions = sp.GetRequiredService>();
- return new InMemoryJobShardManager(siloDetails.SiloAddress, membershipService, durableJobsOptions.Value.MaxAdoptedCount);
- });
- services.AddFromExisting();
+ var builder = new ServiceCollectionSiloBuilder(services);
+ builder.AddJournalStorage();
+ builder.UseJsonJournalFormat(options => options.AddTypeInfoResolver(DurableJobsJsonContext.Default));
+ return services.UseVolatileJournaledDurableJobs();
+ }
+
+ private static IServiceCollection UseVolatileJournaledDurableJobs(this IServiceCollection services)
+ {
+ services.TryAddSingleton();
+ services.AddFromExisting();
+ services.AddFromExisting();
+ services.TryAddSingleton();
+ services.AddFromExisting();
return services;
}
+
+ private sealed class ServiceCollectionSiloBuilder : ISiloBuilder
+ {
+ public ServiceCollectionSiloBuilder(IServiceCollection services)
+ {
+ Services = services;
+ Configuration = new ConfigurationBuilder().Build();
+ }
+
+ public IServiceCollection Services { get; }
+
+ public IConfiguration Configuration { get; }
+ }
}
diff --git a/src/Orleans.DurableJobs/Hosting/DurableJobsOptions.cs b/src/Orleans.DurableJobs/Hosting/DurableJobsOptions.cs
index 133356868a7..751a92a69df 100644
--- a/src/Orleans.DurableJobs/Hosting/DurableJobsOptions.cs
+++ b/src/Orleans.DurableJobs/Hosting/DurableJobsOptions.cs
@@ -1,8 +1,11 @@
using System;
+using System.Collections.Generic;
+using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Orleans.Runtime;
using Orleans.DurableJobs;
+using Orleans.Journaling;
namespace Orleans.Hosting;
@@ -210,3 +213,87 @@ public void ValidateConfiguration()
)]
private static partial void LogInformationOptionsValidated(ILogger logger, TimeSpan shardDuration);
}
+
+internal sealed class DurableJobsJournalingConfigurationValidator : IConfigurationValidator
+{
+ private readonly IServiceProvider _serviceProvider;
+
+ public DurableJobsJournalingConfigurationValidator(IServiceProvider serviceProvider)
+ {
+ _serviceProvider = serviceProvider;
+ }
+
+ public void ValidateConfiguration()
+ {
+ var missingServices = new List();
+ var serviceProviderIsService = _serviceProvider.GetService();
+
+ CheckService(serviceProviderIsService, missingServices);
+ CheckService(serviceProviderIsService, missingServices);
+ CheckService(serviceProviderIsService, missingServices);
+ CheckService(serviceProviderIsService, missingServices);
+
+ if (missingServices.Count > 0)
+ {
+ throw new OrleansConfigurationException(
+ $"DurableJobs requires Orleans.Journaling storage. Configure DurableJobs storage using UseInMemoryDurableJobs() or UseAzureBlobDurableJobs(...) before starting the silo. Missing services: {string.Join(", ", missingServices)}.");
+ }
+
+ var shardManager = ResolveRequiredService();
+ if (shardManager is not JournaledJobShardManager)
+ {
+ throw new OrleansConfigurationException(
+ $"DurableJobs requires the journaled shard manager, but '{shardManager.GetType().FullName}' is registered. Configure DurableJobs storage using UseInMemoryDurableJobs() or UseAzureBlobDurableJobs(...).");
+ }
+ }
+
+ private void CheckService(IServiceProviderIsService? serviceProviderIsService, List missingServices)
+ where TService : class
+ {
+ if (serviceProviderIsService is not null)
+ {
+ if (!serviceProviderIsService.IsService(typeof(TService)))
+ {
+ missingServices.Add(typeof(TService).Name);
+ }
+
+ return;
+ }
+
+ if (ResolveService() is null)
+ {
+ missingServices.Add(typeof(TService).Name);
+ }
+ }
+
+ private TService? ResolveService()
+ where TService : class
+ {
+ try
+ {
+ return _serviceProvider.GetService();
+ }
+ catch (Exception exception)
+ {
+ throw CreateServiceResolutionException(exception);
+ }
+ }
+
+ private TService ResolveRequiredService()
+ where TService : notnull
+ {
+ try
+ {
+ return _serviceProvider.GetRequiredService();
+ }
+ catch (Exception exception)
+ {
+ throw CreateServiceResolutionException(exception);
+ }
+ }
+
+ private static OrleansConfigurationException CreateServiceResolutionException(Exception exception)
+ => new(
+ $"DurableJobs requires Orleans.Journaling storage, but service '{typeof(TService).Name}' could not be resolved. Configure DurableJobs storage using UseInMemoryDurableJobs() or UseAzureBlobDurableJobs(...).",
+ exception);
+}
diff --git a/src/Orleans.DurableJobs/ILocalDurableJobManager.cs b/src/Orleans.DurableJobs/ILocalDurableJobManager.cs
index 9e50a6231af..3bc367cf44f 100644
--- a/src/Orleans.DurableJobs/ILocalDurableJobManager.cs
+++ b/src/Orleans.DurableJobs/ILocalDurableJobManager.cs
@@ -27,3 +27,8 @@ public interface ILocalDurableJobManager
/// A representing the asynchronous operation that returns if the job was successfully canceled; otherwise, .
Task TryCancelDurableJobAsync(DurableJob job, CancellationToken cancellationToken);
}
+
+internal interface ILocalDurableJobManagerSystemTarget : ISystemTarget
+{
+ Task TryCancelDurableJobAsync(DurableJob job, CancellationToken cancellationToken);
+}
diff --git a/src/Orleans.DurableJobs/InMemoryJobQueue.cs b/src/Orleans.DurableJobs/InMemoryJobQueue.cs
index 55db1811620..e687ed3c00e 100644
--- a/src/Orleans.DurableJobs/InMemoryJobQueue.cs
+++ b/src/Orleans.DurableJobs/InMemoryJobQueue.cs
@@ -12,6 +12,7 @@ namespace Orleans.DurableJobs;
///
internal sealed class InMemoryJobQueue : IAsyncEnumerable
{
+ private readonly TimeProvider _timeProvider;
private readonly PriorityQueue _queue = new();
private readonly Dictionary _jobsIdToBucket = new();
private readonly Dictionary _buckets = new();
@@ -22,6 +23,11 @@ internal sealed class InMemoryJobQueue : IAsyncEnumerable
private readonly object _syncLock = new();
#endif
+ public InMemoryJobQueue(TimeProvider? timeProvider = null)
+ {
+ _timeProvider = timeProvider ?? TimeProvider.System;
+ }
+
///
/// Gets the total number of jobs currently in the queue.
///
@@ -37,6 +43,10 @@ internal sealed class InMemoryJobQueue : IAsyncEnumerable
public void Enqueue(DurableJob job, int dequeueCount)
{
ArgumentNullException.ThrowIfNull(job);
+ if (dequeueCount < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(dequeueCount));
+ }
lock (_syncLock)
{
@@ -97,27 +107,83 @@ public bool CancelJob(string jobId)
///
public void RetryJobLater(IJobRunContext jobContext, DateTimeOffset newDueTime)
{
- var jobId = jobContext.Job.Id;
- var newJob = new DurableJob
+ ArgumentNullException.ThrowIfNull(jobContext);
+ _ = RetryJobLater(jobContext.Job.Id, newDueTime, jobContext.DequeueCount);
+ }
+
+ ///
+ /// Reschedules a job for retry with a new due time.
+ ///
+ /// The unique identifier of the job to retry.
+ /// The new due time for the job.
+ /// The persisted dequeue count to associate with the retried job.
+ /// True if the job was found and rescheduled; false if the job was not found.
+ public bool RetryJobLater(string jobId, DateTimeOffset newDueTime, int dequeueCount)
+ {
+ ArgumentException.ThrowIfNullOrWhiteSpace(jobId);
+ if (dequeueCount < 0)
{
- Id = jobContext.Job.Id,
- Name = jobContext.Job.Name,
- DueTime = newDueTime,
- TargetGrainId = jobContext.Job.TargetGrainId,
- ShardId = jobContext.Job.ShardId,
- Metadata = jobContext.Job.Metadata
- };
+ throw new ArgumentOutOfRangeException(nameof(dequeueCount));
+ }
lock (_syncLock)
{
- if (_jobsIdToBucket.TryGetValue(jobId, out var oldBucket))
+ if (!_jobsIdToBucket.TryGetValue(jobId, out var oldBucket) || !oldBucket.TryGetJob(jobId, out var existing))
{
- oldBucket.RemoveJob(jobId);
- _jobsIdToBucket.Remove(jobId);
- var newBucket = GetJobBucket(newDueTime);
- newBucket.AddJob(newJob, jobContext.DequeueCount);
- _jobsIdToBucket[jobId] = newBucket;
+ return false;
}
+
+ var newJob = new DurableJob
+ {
+ Id = existing.Job.Id,
+ Name = existing.Job.Name,
+ DueTime = newDueTime,
+ TargetGrainId = existing.Job.TargetGrainId,
+ ShardId = existing.Job.ShardId,
+ Metadata = existing.Job.Metadata
+ };
+
+ oldBucket.RemoveJob(jobId);
+ _jobsIdToBucket.Remove(jobId);
+ var newBucket = GetJobBucket(newDueTime);
+ newBucket.AddJob(newJob, dequeueCount);
+ _jobsIdToBucket[jobId] = newBucket;
+ return true;
+ }
+ }
+
+ ///
+ /// Gets a point-in-time snapshot of live jobs and their persisted dequeue counts.
+ ///
+ /// The current live jobs and dequeue counts.
+ public IReadOnlyList<(DurableJob Job, int DequeueCount)> GetSnapshot()
+ {
+ lock (_syncLock)
+ {
+ var result = new List<(DurableJob Job, int DequeueCount)>(_jobsIdToBucket.Count);
+ foreach (var (jobId, bucket) in _jobsIdToBucket)
+ {
+ if (bucket.TryGetJob(jobId, out var item))
+ {
+ result.Add(item);
+ }
+ }
+
+ return result;
+ }
+ }
+
+ ///
+ /// Clears all queue state.
+ ///
+ public void Clear()
+ {
+ lock (_syncLock)
+ {
+ _queue.Clear();
+ _jobsIdToBucket.Clear();
+ _buckets.Clear();
+ _isComplete = false;
}
}
@@ -131,7 +197,7 @@ public void RetryJobLater(IJobRunContext jobContext, DateTimeOffset newDueTime)
///
public async IAsyncEnumerator GetAsyncEnumerator(CancellationToken cancellationToken = default)
{
- using var timer = new PeriodicTimer(TimeSpan.FromSeconds(1));
+ using var timer = new PeriodicTimer(TimeSpan.FromSeconds(1), _timeProvider);
while (true)
{
JobBucket? bucketToProcess = null;
@@ -149,7 +215,7 @@ public async IAsyncEnumerator GetAsyncEnumerator(CancellationTok
else if (_queue.Count > 0)
{
var nextBucket = _queue.Peek();
- if (nextBucket.DueTime < DateTimeOffset.UtcNow)
+ if (nextBucket.DueTime < _timeProvider.GetUtcNow())
{
// Dequeue the entire bucket to process outside the lock
bucketToProcess = _queue.Dequeue();
@@ -230,4 +296,9 @@ public bool RemoveJob(string jobId)
{
return _jobs.Remove(jobId);
}
+
+ public bool TryGetJob(string jobId, out (DurableJob Job, int DequeueCount) job)
+ {
+ return _jobs.TryGetValue(jobId, out job);
+ }
}
diff --git a/src/Orleans.DurableJobs/InMemoryJobShard.cs b/src/Orleans.DurableJobs/InMemoryJobShard.cs
deleted file mode 100644
index 148a14b0716..00000000000
--- a/src/Orleans.DurableJobs/InMemoryJobShard.cs
+++ /dev/null
@@ -1,33 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.Threading;
-using System.Threading.Tasks;
-using Orleans.Runtime;
-
-namespace Orleans.DurableJobs;
-
-[DebuggerDisplay("ShardId={Id}, StartTime={StartTime}, EndTime={EndTime}")]
-internal sealed class InMemoryJobShard : JobShard
-{
- public InMemoryJobShard(string shardId, DateTimeOffset minDueTime, DateTimeOffset maxDueTime, IDictionary? metadata)
- : base(shardId, minDueTime, maxDueTime)
- {
- Metadata = metadata;
- }
-
- protected override Task PersistAddJobAsync(string jobId, string jobName, DateTimeOffset dueTime, GrainId target, IReadOnlyDictionary? metadata, CancellationToken cancellationToken)
- {
- return Task.CompletedTask;
- }
-
- protected override Task PersistRemoveJobAsync(string jobId, CancellationToken cancellationToken)
- {
- return Task.CompletedTask;
- }
-
- protected override Task PersistRetryJobAsync(string jobId, DateTimeOffset newDueTime, CancellationToken cancellationToken)
- {
- return Task.CompletedTask;
- }
-}
diff --git a/src/Orleans.DurableJobs/JobShardId.cs b/src/Orleans.DurableJobs/JobShardId.cs
new file mode 100644
index 00000000000..a6b6e14480a
--- /dev/null
+++ b/src/Orleans.DurableJobs/JobShardId.cs
@@ -0,0 +1,68 @@
+using System;
+using Orleans.Journaling;
+
+namespace Orleans.DurableJobs;
+
+internal readonly record struct JobShardId
+{
+ private const string RootSegment = "jobs";
+ private const string ShardsSegment = "shards";
+
+ public JobShardId(string value)
+ {
+ ArgumentException.ThrowIfNullOrWhiteSpace(value);
+ Value = value;
+ }
+
+ public string Value { get; }
+
+ public static JournalId StoragePrefix => JournalId.Create(RootSegment, ShardsSegment);
+
+ public static JobShardId New() => new(Guid.NewGuid().ToString("N"));
+
+ public static JobShardId Parse(string value) => new(value);
+
+ public static JobShardId FromJournalId(JournalId journalId)
+ {
+ if (journalId.IsDefault)
+ {
+ throw new ArgumentException("The journal id must not be the default value.", nameof(journalId));
+ }
+
+ var segments = DecodeSegments(journalId.Value);
+ if (segments.Length != 3
+ || !string.Equals(segments[0], RootSegment, StringComparison.Ordinal)
+ || !string.Equals(segments[1], ShardsSegment, StringComparison.Ordinal))
+ {
+ throw new ArgumentException($"Journal id '{journalId}' is not a DurableJobs shard journal id.", nameof(journalId));
+ }
+
+ return new(segments[2]);
+ }
+
+ public JournalId ToJournalId() => JournalId.Create(RootSegment, ShardsSegment, Value);
+
+ public override string ToString() => Value;
+
+ private static string[] DecodeSegments(string value)
+ {
+ if (value[0] == '/' || value[^1] == '/')
+ {
+ throw new ArgumentException("A journal id must not start or end with a separator.", nameof(value));
+ }
+
+ var encodedSegments = value.Split('/');
+ var decodedSegments = new string[encodedSegments.Length];
+ for (var i = 0; i < encodedSegments.Length; i++)
+ {
+ if (encodedSegments[i].Length == 0)
+ {
+ throw new ArgumentException("A journal id must not contain empty segments.", nameof(value));
+ }
+
+ decodedSegments[i] = Uri.UnescapeDataString(encodedSegments[i]);
+ }
+
+ return decodedSegments;
+ }
+}
diff --git a/src/Orleans.DurableJobs/JobShardManager.cs b/src/Orleans.DurableJobs/JobShardManager.cs
index 51a5feaa677..5a1a9408768 100644
--- a/src/Orleans.DurableJobs/JobShardManager.cs
+++ b/src/Orleans.DurableJobs/JobShardManager.cs
@@ -1,6 +1,5 @@
using System;
using System.Collections.Generic;
-using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Orleans.Runtime;
@@ -9,8 +8,10 @@ namespace Orleans.DurableJobs;
///
/// Manages the lifecycle of job shards for a specific silo.
-/// Each silo instance has its own shard manager.
///
+///
+/// Each silo instance has its own shard manager.
+///
public abstract class JobShardManager
{
///
@@ -57,209 +58,8 @@ protected JobShardManager(SiloAddress siloAddress)
/// Cancellation token.
/// A task representing the asynchronous operation.
public abstract Task UnregisterShardAsync(IJobShard shard, CancellationToken cancellationToken);
-}
-
-internal class InMemoryJobShardManager : JobShardManager
-{
- // Shared storage across all manager instances to support multi-silo scenarios
- private static readonly Dictionary _globalShardStore = new();
- private static readonly SemaphoreSlim _asyncLock = new(1, 1);
- private readonly IClusterMembershipService? _membershipService;
- private readonly int _maxAdoptedCount;
-
- public InMemoryJobShardManager(SiloAddress siloAddress) : this(siloAddress, null, 3)
- {
- }
-
- public InMemoryJobShardManager(SiloAddress siloAddress, IClusterMembershipService? membershipService) : this(siloAddress, membershipService, 3)
- {
- }
-
- public InMemoryJobShardManager(SiloAddress siloAddress, IClusterMembershipService? membershipService, int maxAdoptedCount) : base(siloAddress)
- {
- _membershipService = membershipService;
- _maxAdoptedCount = maxAdoptedCount;
- }
-
- ///
- /// Clears all shards from the global store. For testing purposes only.
- ///
- internal static async Task ClearAllShardsAsync()
- {
- await _asyncLock.WaitAsync();
- try
- {
- _globalShardStore.Clear();
- }
- finally
- {
- _asyncLock.Release();
- }
- }
-
- ///
- /// Gets ownership info for a shard. For testing purposes only.
- ///
- internal static async Task<(string? Owner, int AdoptedCount)?> GetOwnershipInfoAsync(string shardId)
- {
- await _asyncLock.WaitAsync();
- try
- {
- if (_globalShardStore.TryGetValue(shardId, out var ownership))
- {
- return (ownership.OwnerSiloAddress, ownership.AdoptedCount);
- }
- return null;
- }
- finally
- {
- _asyncLock.Release();
- }
- }
-
- public override async Task> AssignJobShardsAsync(DateTimeOffset maxDueTime, int maxNewClaims, CancellationToken cancellationToken)
- {
- var alreadyOwnedShards = new List();
- var adoptedShards = new List();
-
- await _asyncLock.WaitAsync(cancellationToken);
- try
- {
- var snapshot = _membershipService?.CurrentSnapshot;
- var deadSilos = new HashSet();
-
- if (snapshot is not null)
- {
- foreach (var member in snapshot.Members.Values)
- {
- if (member.Status == SiloStatus.Dead)
- {
- deadSilos.Add(member.SiloAddress.ToString());
- }
- }
- }
-
- // Assign shards from dead silos or orphaned shards
- foreach (var kvp in _globalShardStore)
- {
- var shardId = kvp.Key;
- var ownership = kvp.Value;
-
- // Skip shards that are already owned by this silo
- if (ownership.OwnerSiloAddress == SiloAddress.ToString())
- {
- if (ownership.Shard.StartTime <= maxDueTime)
- {
- alreadyOwnedShards.Add(ownership.Shard);
- }
- continue;
- }
-
- // Check if this is an orphaned shard (gracefully released) or adopted (from dead silo)
- var isOrphaned = ownership.OwnerSiloAddress is null;
- var ownerAddress = ownership.OwnerSiloAddress;
- var isFromDeadSilo = ownerAddress is not null && deadSilos.Contains(ownerAddress);
-
- if (isOrphaned || isFromDeadSilo)
- {
- if (ownership.Shard.StartTime <= maxDueTime)
- {
- // Respect the slow-start budget: skip claiming if we've exhausted the budget.
- // This must be checked before incrementing AdoptedCount to avoid
- // inflating the count when the shard isn't actually claimed.
- if (adoptedShards.Count >= maxNewClaims)
- {
- continue;
- }
-
- // If adopted from dead silo, increment adopted count
- if (isFromDeadSilo)
- {
- ownership.AdoptedCount++;
-
- // Check if shard is poisoned
- if (ownership.AdoptedCount > _maxAdoptedCount)
- {
- // Shard is poisoned - don't assign it
- continue;
- }
- }
-
- ownership.OwnerSiloAddress = SiloAddress.ToString();
- adoptedShards.Add(ownership.Shard);
- }
- }
- }
- }
- finally
- {
- _asyncLock.Release();
- }
- foreach (var shard in adoptedShards)
- {
- // Mark adopted shards as complete
- await shard.MarkAsCompleteAsync(CancellationToken.None);
- }
+ internal virtual ValueTask GetShardOwnerAsync(string shardId, CancellationToken cancellationToken) => new((SiloAddress?)null);
- return [.. alreadyOwnedShards, .. adoptedShards];
- }
-
- public override async Task CreateShardAsync(DateTimeOffset minDueTime, DateTimeOffset maxDueTime, IDictionary metadata, CancellationToken cancellationToken)
- {
- await _asyncLock.WaitAsync(cancellationToken);
- try
- {
- var shardId = $"{SiloAddress}-{Guid.NewGuid()}";
- var newShard = new InMemoryJobShard(shardId, minDueTime, maxDueTime, metadata);
-
- _globalShardStore[shardId] = new ShardOwnership
- {
- Shard = newShard,
- OwnerSiloAddress = SiloAddress.ToString()
- };
-
- return newShard;
- }
- finally
- {
- _asyncLock.Release();
- }
- }
-
- public override async Task UnregisterShardAsync(IJobShard shard, CancellationToken cancellationToken)
- {
- var jobCount = await shard.GetJobCountAsync();
-
- await _asyncLock.WaitAsync(cancellationToken);
- try
- {
- // Only remove shards that have no jobs remaining
- if (_globalShardStore.TryGetValue(shard.Id, out var ownership))
- {
- if (jobCount == 0)
- {
- _globalShardStore.Remove(shard.Id);
- }
- else
- {
- // Mark as unowned so another silo can pick it up
- ownership.OwnerSiloAddress = null;
- // Reset adopted count since we're gracefully releasing (not crashing)
- ownership.AdoptedCount = 0;
- }
- }
- }
- finally
- {
- _asyncLock.Release();
- }
- }
-
- private sealed class ShardOwnership
- {
- public required IJobShard Shard { get; init; }
- public string? OwnerSiloAddress { get; set; }
- public int AdoptedCount { get; set; }
- }
+ internal virtual ValueTask IsShardOwnedByLocalSiloAsync(string shardId, CancellationToken cancellationToken) => new(true);
}
diff --git a/src/Orleans.DurableJobs/JournaledJobShard.cs b/src/Orleans.DurableJobs/JournaledJobShard.cs
new file mode 100644
index 00000000000..5d66d778aad
--- /dev/null
+++ b/src/Orleans.DurableJobs/JournaledJobShard.cs
@@ -0,0 +1,229 @@
+using System;
+using System.Collections.Generic;
+using System.Threading;
+using System.Threading.Tasks;
+using Orleans.Journaling;
+
+namespace Orleans.DurableJobs;
+
+///
+/// Journaled implementation of that stores shard state in Orleans journaling storage.
+///
+internal sealed class JournaledJobShard : IJobShard
+{
+ private readonly JournaledJobShardState _state;
+ private readonly IJournaledStateManager _stateManager;
+ private readonly JournaledJobShardManager _shardManager;
+ private readonly SemaphoreSlim _operationLock = new(1, 1);
+ private int _disposed;
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The unique identifier for this job shard.
+ /// The start time of the time range managed by this shard.
+ /// The end time of the time range managed by this shard.
+ /// Optional metadata associated with this job shard.
+ /// A value indicating whether this shard is closed to new jobs.
+ /// The journaled shard state.
+ /// The manager used to persist journaled state.
+ /// The shard manager that owns this shard.
+ public JournaledJobShard(
+ JobShardId shardId,
+ DateTimeOffset startTime,
+ DateTimeOffset endTime,
+ IReadOnlyDictionary? metadata,
+ bool isClosed,
+ JournaledJobShardState state,
+ IJournaledStateManager stateManager,
+ JournaledJobShardManager shardManager)
+ {
+ ArgumentNullException.ThrowIfNull(state);
+ ArgumentNullException.ThrowIfNull(stateManager);
+ ArgumentNullException.ThrowIfNull(shardManager);
+
+ Id = shardId.Value;
+ StartTime = startTime;
+ EndTime = endTime;
+ Metadata = metadata is { Count: > 0 } ? new Dictionary(metadata, StringComparer.Ordinal) : null;
+ _state = state;
+ _stateManager = stateManager;
+ _shardManager = shardManager;
+
+ if (isClosed)
+ {
+ _state.MarkAsComplete();
+ }
+ }
+
+ ///
+ public string Id { get; }
+
+ ///
+ public DateTimeOffset StartTime { get; }
+
+ ///
+ public DateTimeOffset EndTime { get; }
+
+ ///
+ public IDictionary? Metadata { get; }
+
+ ///
+ public bool IsAddingCompleted => _state.IsAddingCompleted;
+
+ ///
+ /// Gets the backing journal identifier for this shard.
+ ///
+ internal JournalId StorageId => JobShardId.Parse(Id).ToJournalId();
+
+ ///
+ public IAsyncEnumerable ConsumeDurableJobsAsync() => _state.ConsumeDurableJobsAsync();
+
+ ///
+ public ValueTask GetJobCountAsync() => ValueTask.FromResult(_state.Count);
+
+ ///
+ public async Task MarkAsCompleteAsync(CancellationToken cancellationToken)
+ {
+ ThrowIfDisposed();
+
+ await _operationLock.WaitAsync(cancellationToken);
+ try
+ {
+ if (_state.IsAddingCompleted)
+ {
+ return;
+ }
+
+ if (await _shardManager.TryMarkShardClosedAsync(Id, cancellationToken))
+ {
+ _state.MarkAsComplete();
+ }
+ }
+ finally
+ {
+ _operationLock.Release();
+ }
+ }
+
+ ///
+ public async Task RemoveJobAsync(string jobId, CancellationToken cancellationToken)
+ {
+ ArgumentException.ThrowIfNullOrWhiteSpace(jobId);
+ ThrowIfDisposed();
+
+ await _operationLock.WaitAsync(cancellationToken);
+ try
+ {
+ if (!await _shardManager.IsShardOwnedByLocalSiloAsync(Id, cancellationToken))
+ {
+ return false;
+ }
+
+ var removed = _state.RemoveJob(jobId);
+ await _stateManager.WriteStateAsync(cancellationToken);
+ return removed;
+ }
+ finally
+ {
+ _operationLock.Release();
+ }
+ }
+
+ ///
+ public async Task RetryJobLaterAsync(IJobRunContext jobContext, DateTimeOffset newDueTime, CancellationToken cancellationToken)
+ {
+ ArgumentNullException.ThrowIfNull(jobContext);
+ ThrowIfDisposed();
+
+ await _operationLock.WaitAsync(cancellationToken);
+ try
+ {
+ if (!await _shardManager.IsShardOwnedByLocalSiloAsync(Id, cancellationToken))
+ {
+ return;
+ }
+
+ _state.RetryJobLater(jobContext, newDueTime);
+ await _stateManager.WriteStateAsync(cancellationToken);
+ }
+ finally
+ {
+ _operationLock.Release();
+ }
+ }
+
+ ///
+ public async Task TryScheduleJobAsync(ScheduleJobRequest request, CancellationToken cancellationToken)
+ {
+ ThrowIfDisposed();
+
+ await _operationLock.WaitAsync(cancellationToken);
+ try
+ {
+ if (_state.IsAddingCompleted)
+ {
+ return null;
+ }
+
+ if (!await _shardManager.IsShardOwnedByLocalSiloAsync(Id, cancellationToken))
+ {
+ return null;
+ }
+
+ var job = _state.TryScheduleJob(request);
+ if (job is null)
+ {
+ return null;
+ }
+
+ await _stateManager.WriteStateAsync(cancellationToken);
+ return job;
+ }
+ finally
+ {
+ _operationLock.Release();
+ }
+ }
+
+ ///
+ /// Deletes this shard's journaled state.
+ ///
+ /// A token to cancel the operation.
+ /// A task that represents the asynchronous operation.
+ internal async ValueTask DeleteStateAsync(CancellationToken cancellationToken)
+ {
+ ThrowIfDisposed();
+
+ await _operationLock.WaitAsync(cancellationToken);
+ try
+ {
+ await _stateManager.DeleteStateAsync(cancellationToken);
+ }
+ finally
+ {
+ _operationLock.Release();
+ }
+ }
+
+ ///
+ public async ValueTask DisposeAsync()
+ {
+ if (Interlocked.Exchange(ref _disposed, 1) != 0)
+ {
+ return;
+ }
+
+ try
+ {
+ await _stateManager.DisposeAsync();
+ }
+ finally
+ {
+ _operationLock.Dispose();
+ GC.SuppressFinalize(this);
+ }
+ }
+
+ private void ThrowIfDisposed() => ObjectDisposedException.ThrowIf(_disposed != 0, this);
+}
diff --git a/src/Orleans.DurableJobs/JournaledJobShardManager.cs b/src/Orleans.DurableJobs/JournaledJobShardManager.cs
new file mode 100644
index 00000000000..0ca5e9fab9c
--- /dev/null
+++ b/src/Orleans.DurableJobs/JournaledJobShardManager.cs
@@ -0,0 +1,560 @@
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Options;
+using Orleans.Hosting;
+using Orleans.Journaling;
+using Orleans.Runtime;
+
+namespace Orleans.DurableJobs;
+
+internal sealed class JournaledJobShardManager : JobShardManager
+{
+ private const string OwnerProperty = "DurableJobsOwner";
+ private const string MembershipVersionProperty = "DurableJobsMembershipVersion";
+ private const string MinDueTimeProperty = "DurableJobsMinDueTime";
+ private const string MaxDueTimeProperty = "DurableJobsMaxDueTime";
+ private const string AdoptedCountProperty = "DurableJobsAdoptedCount";
+ private const string LastAdoptedTimeProperty = "DurableJobsLastAdoptedTime";
+ private const string PoisonedProperty = "DurableJobsPoisoned";
+ private const string ClosedProperty = "DurableJobsClosed";
+ private const string MetadataPropertyPrefix = "DurableJobsMetadata_";
+
+ private readonly IJournaledStateManagerFactory _stateManagerFactory;
+ private readonly IJournalStorageProvider _storageProvider;
+ private readonly IJournalStorageCatalog _catalog;
+ private readonly IClusterMembershipService _membershipService;
+ private readonly IServiceProvider _serviceProvider;
+ private readonly DurableJobsOptions _options;
+ private readonly JournaledStateManagerOptions _journaledStateManagerOptions;
+ private readonly TimeProvider _timeProvider;
+ private readonly ConcurrentDictionary _jobShardCache = new();
+
+ public JournaledJobShardManager(
+ ILocalSiloDetails localSiloDetails,
+ IJournaledStateManagerFactory stateManagerFactory,
+ IJournalStorageProvider storageProvider,
+ IJournalStorageCatalog catalog,
+ IClusterMembershipService membershipService,
+ IServiceProvider serviceProvider,
+ IOptions options,
+ IOptions journaledStateManagerOptions)
+ : base(GetSiloAddress(localSiloDetails))
+ {
+ ArgumentNullException.ThrowIfNull(localSiloDetails);
+ ArgumentNullException.ThrowIfNull(stateManagerFactory);
+ ArgumentNullException.ThrowIfNull(storageProvider);
+ ArgumentNullException.ThrowIfNull(catalog);
+ ArgumentNullException.ThrowIfNull(membershipService);
+ ArgumentNullException.ThrowIfNull(serviceProvider);
+ ArgumentNullException.ThrowIfNull(options);
+ ArgumentNullException.ThrowIfNull(journaledStateManagerOptions);
+
+ _stateManagerFactory = stateManagerFactory;
+ _storageProvider = storageProvider;
+ _catalog = catalog;
+ _membershipService = membershipService;
+ _serviceProvider = serviceProvider;
+ _options = options.Value;
+ _journaledStateManagerOptions = journaledStateManagerOptions.Value;
+ _timeProvider = serviceProvider.GetService() ?? TimeProvider.System;
+ }
+
+ private static SiloAddress GetSiloAddress(ILocalSiloDetails localSiloDetails)
+ {
+ ArgumentNullException.ThrowIfNull(localSiloDetails);
+ return localSiloDetails.SiloAddress;
+ }
+
+ public override async Task> AssignJobShardsAsync(DateTimeOffset maxDueTime, int maxNewClaims, CancellationToken cancellationToken)
+ {
+ var result = new List();
+ var newClaimCount = 0;
+ var membershipSnapshot = _membershipService.CurrentSnapshot;
+
+ await foreach (var storageId in _catalog.ListAsync(JobShardId.StoragePrefix, cancellationToken))
+ {
+ var descriptor = await GetDescriptorAsync(storageId, cancellationToken);
+ if (descriptor is null || descriptor.Poisoned || descriptor.StartTime > maxDueTime)
+ {
+ continue;
+ }
+
+ if (descriptor.MembershipVersion > membershipSnapshot.Version)
+ {
+ // Refresh membership to at least that version.
+ await _membershipService.Refresh(descriptor.MembershipVersion, cancellationToken);
+ membershipSnapshot = _membershipService.CurrentSnapshot;
+ }
+
+ if (descriptor.Owner is { } owner && owner.Equals(SiloAddress))
+ {
+ result.Add(await GetOrOpenShardAsync(descriptor, cancellationToken));
+ continue;
+ }
+
+ // Determine if this is an adopted shard (taken from dead owner) vs orphaned (gracefully released).
+ var isAdopted = false;
+ if (descriptor.Owner is { } previousOwner)
+ {
+ var ownerStatus = membershipSnapshot.GetSiloStatus(previousOwner);
+ if (ownerStatus is not SiloStatus.Dead and not SiloStatus.None)
+ {
+ // Owner is still active and it's not me, skip this shard.
+ continue;
+ }
+
+ isAdopted = ownerStatus == SiloStatus.Dead;
+ }
+
+ // Respect the slow-start budget: skip claiming if we've exhausted the budget.
+ // This must be checked before incrementing the adopted count to avoid
+ // inflating the count when the shard isn't actually claimed.
+ if (newClaimCount >= maxNewClaims)
+ {
+ continue;
+ }
+
+ // Try to claim orphaned or adopted shard.
+ var claimedShard = await TryClaimShardAsync(descriptor, isAdopted, cancellationToken);
+ if (claimedShard is null)
+ {
+ // Either poisoned shard or someone else took ownership.
+ continue;
+ }
+
+ _jobShardCache[claimedShard.Id] = claimedShard;
+ result.Add(claimedShard);
+ newClaimCount++;
+ }
+
+ return result;
+ }
+
+ public override async Task CreateShardAsync(DateTimeOffset minDueTime, DateTimeOffset maxDueTime, IDictionary metadata, CancellationToken cancellationToken)
+ {
+ while (true)
+ {
+ var shardId = JobShardId.New();
+ var storageId = shardId.ToJournalId();
+ var initialProperties = CreateInitialProperties(minDueTime, maxDueTime, metadata);
+ var storage = _storageProvider.CreateStorage(storageId);
+ if (!await storage.CreateIfNotExistsAsync(initialProperties, cancellationToken))
+ {
+ continue;
+ }
+
+ var properties = await storage.GetMetadataAsync(cancellationToken);
+ var descriptor = properties is not null ? ShardCatalogProperties.From(storageId, properties) : null;
+ if (descriptor is null)
+ {
+ throw new InvalidOperationException($"Created DurableJobs shard '{shardId}' without readable journal storage properties.");
+ }
+
+ var shard = await OpenShardAsync(descriptor, cancellationToken);
+ _jobShardCache[shard.Id] = shard;
+ return shard;
+ }
+ }
+
+ public override async Task UnregisterShardAsync(IJobShard shard, CancellationToken cancellationToken)
+ {
+ var journaledShard = shard as JournaledJobShard
+ ?? throw new ArgumentException("Shard is not a journaled DurableJobs shard.", nameof(shard));
+
+ try
+ {
+ var descriptor = await GetDescriptorAsync(journaledShard.StorageId, cancellationToken)
+ ?? throw new InvalidOperationException($"Cannot unregister DurableJobs shard '{shard.Id}' because its catalog properties were not found.");
+
+ if (descriptor.Owner is null || !descriptor.Owner.Equals(SiloAddress))
+ {
+ throw new InvalidOperationException("Cannot unregister a DurableJobs shard owned by another silo.");
+ }
+
+ var count = await shard.GetJobCountAsync();
+ if (count == 0)
+ {
+ // No jobs left, we can delete the shard.
+ await journaledShard.DeleteStateAsync(cancellationToken);
+ }
+ else
+ {
+ // There are still jobs in the shard, release ownership gracefully.
+ var updatedMetadata = await UpdateMetadataAsync(
+ descriptor,
+ new Dictionary(StringComparer.Ordinal)
+ {
+ [ClosedProperty] = bool.TrueString,
+ [MembershipVersionProperty] = GetMembershipVersionString()
+ },
+ [OwnerProperty, AdoptedCountProperty, LastAdoptedTimeProperty],
+ cancellationToken);
+
+ if (updatedMetadata is null)
+ {
+ throw new InvalidOperationException($"Failed to release DurableJobs shard '{shard.Id}' ownership.");
+ }
+ }
+ }
+ finally
+ {
+ _jobShardCache.TryRemove(shard.Id, out _);
+ await journaledShard.DisposeAsync();
+ }
+ }
+
+ internal override async ValueTask GetShardOwnerAsync(string shardId, CancellationToken cancellationToken)
+ {
+ var descriptor = await GetDescriptorAsync(shardId, cancellationToken);
+ if (descriptor is null || descriptor.Poisoned || descriptor.Owner is null)
+ {
+ return null;
+ }
+
+ if (descriptor.Owner.Equals(SiloAddress))
+ {
+ return descriptor.Owner;
+ }
+
+ var membershipSnapshot = _membershipService.CurrentSnapshot;
+ if (descriptor.MembershipVersion > membershipSnapshot.Version)
+ {
+ await _membershipService.Refresh(descriptor.MembershipVersion, cancellationToken);
+ membershipSnapshot = _membershipService.CurrentSnapshot;
+ }
+
+ return membershipSnapshot.GetSiloStatus(descriptor.Owner) == SiloStatus.Active ? descriptor.Owner : null;
+ }
+
+ internal override async ValueTask IsShardOwnedByLocalSiloAsync(string shardId, CancellationToken cancellationToken)
+ {
+ var descriptor = await GetDescriptorAsync(shardId, cancellationToken);
+ return descriptor is { Poisoned: false, Owner: { } owner } && owner.Equals(SiloAddress);
+ }
+
+ internal async ValueTask TryMarkShardClosedAsync(string shardId, CancellationToken cancellationToken)
+ {
+ for (var attempt = 0; attempt < 3; attempt++)
+ {
+ var descriptor = await GetDescriptorAsync(shardId, cancellationToken);
+ if (descriptor is null || descriptor.Poisoned || descriptor.Owner is null || !descriptor.Owner.Equals(SiloAddress))
+ {
+ return false;
+ }
+
+ if (descriptor.Closed)
+ {
+ return true;
+ }
+
+ var result = await UpdateMetadataAsync(
+ descriptor,
+ new Dictionary(StringComparer.Ordinal)
+ {
+ [ClosedProperty] = bool.TrueString,
+ [MembershipVersionProperty] = GetMembershipVersionString()
+ },
+ remove: null,
+ cancellationToken);
+ if (result is not null)
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ private async ValueTask TryClaimShardAsync(ShardCatalogProperties descriptor, bool isAdopted, CancellationToken cancellationToken)
+ {
+ var adoptedCount = descriptor.AdoptedCount;
+ var set = new Dictionary(StringComparer.Ordinal)
+ {
+ [OwnerProperty] = SiloAddress.ToParsableString(),
+ [MembershipVersionProperty] = GetMembershipVersionString(),
+ // We don't want to add new jobs to shards that we just took ownership of.
+ [ClosedProperty] = bool.TrueString
+ };
+ List? remove = null;
+
+ if (isAdopted)
+ {
+ // Increment adopted count for shards taken from dead owners.
+ adoptedCount++;
+ if (adoptedCount > _options.MaxAdoptedCount)
+ {
+ // Persist poisoned marker so this shard is not repeatedly re-evaluated as newly poisoned.
+ await TryMarkShardPoisonedAsync(descriptor, adoptedCount, cancellationToken);
+ return null;
+ }
+
+ set[AdoptedCountProperty] = adoptedCount.ToString(CultureInfo.InvariantCulture);
+ set[LastAdoptedTimeProperty] = _timeProvider.GetUtcNow().ToString("O", CultureInfo.InvariantCulture);
+ }
+ else
+ {
+ // Reset adopted count since we're gracefully releasing.
+ set[AdoptedCountProperty] = "0";
+ remove = [LastAdoptedTimeProperty];
+ }
+
+ var updatedMetadata = await UpdateMetadataAsync(descriptor, set, remove, cancellationToken);
+ if (updatedMetadata is null)
+ {
+ return null;
+ }
+
+ var updatedDescriptor = ShardCatalogProperties.From(descriptor.StorageId, updatedMetadata);
+ return updatedDescriptor is null || updatedDescriptor.Owner is null || !updatedDescriptor.Owner.Equals(SiloAddress)
+ ? null
+ : await OpenShardAsync(updatedDescriptor, cancellationToken);
+ }
+
+ private async Task TryMarkShardPoisonedAsync(ShardCatalogProperties descriptor, int adoptedCount, CancellationToken cancellationToken)
+ {
+ await UpdateMetadataAsync(
+ descriptor,
+ new Dictionary(StringComparer.Ordinal)
+ {
+ [PoisonedProperty] = bool.TrueString,
+ [AdoptedCountProperty] = adoptedCount.ToString(CultureInfo.InvariantCulture),
+ [LastAdoptedTimeProperty] = _timeProvider.GetUtcNow().ToString("O", CultureInfo.InvariantCulture),
+ [MembershipVersionProperty] = GetMembershipVersionString()
+ },
+ remove: null,
+ cancellationToken);
+ }
+
+ private async ValueTask GetOrOpenShardAsync(ShardCatalogProperties descriptor, CancellationToken cancellationToken)
+ {
+ if (_jobShardCache.TryGetValue(descriptor.ShardId.Value, out var existing))
+ {
+ return existing;
+ }
+
+ var shard = await OpenShardAsync(descriptor, cancellationToken);
+ if (_jobShardCache.TryAdd(shard.Id, shard))
+ {
+ return shard;
+ }
+
+ await shard.DisposeAsync();
+ return _jobShardCache[descriptor.ShardId.Value];
+ }
+
+ private async ValueTask OpenShardAsync(ShardCatalogProperties descriptor, CancellationToken cancellationToken)
+ {
+ var codec = CreateOperationCodec();
+ var state = new JournaledJobShardState(descriptor.ShardId, descriptor.StartTime, descriptor.EndTime, codec, _timeProvider);
+ var manager = _stateManagerFactory.Create(descriptor.StorageId);
+ try
+ {
+ manager.RegisterState(JournaledJobShardState.StateName, state);
+ await manager.InitializeAsync(cancellationToken).ConfigureAwait(false);
+ }
+ catch
+ {
+ await manager.DisposeAsync().ConfigureAwait(false);
+ throw;
+ }
+
+ return new JournaledJobShard(
+ descriptor.ShardId,
+ descriptor.StartTime,
+ descriptor.EndTime,
+ descriptor.Metadata,
+ descriptor.Closed,
+ state,
+ manager,
+ this);
+ }
+
+ private IDurableValueCommandCodec CreateOperationCodec()
+ {
+ var journalFormatKey = _journaledStateManagerOptions.JournalFormatKey;
+ if (string.IsNullOrWhiteSpace(journalFormatKey))
+ {
+ throw new InvalidOperationException("The configured journal format key must be non-empty.");
+ }
+
+ var codec = _serviceProvider.GetKeyedService>(journalFormatKey);
+ return codec ?? throw new InvalidOperationException(
+ $"Journal format key '{journalFormatKey}' requires keyed service '{typeof(IDurableValueCommandCodec).FullName}', but none was registered.");
+ }
+
+ private async ValueTask GetDescriptorAsync(string shardId, CancellationToken cancellationToken)
+ {
+ try
+ {
+ return await GetDescriptorAsync(JobShardId.Parse(shardId).ToJournalId(), cancellationToken);
+ }
+ catch (ArgumentException)
+ {
+ return null;
+ }
+ }
+
+ private async ValueTask GetDescriptorAsync(JournalId storageId, CancellationToken cancellationToken)
+ {
+ var properties = await _storageProvider.CreateStorage(storageId).GetMetadataAsync(cancellationToken);
+ return properties is null ? null : ShardCatalogProperties.From(storageId, properties);
+ }
+
+ private async ValueTask UpdateMetadataAsync(
+ ShardCatalogProperties descriptor,
+ IReadOnlyDictionary? set,
+ IEnumerable? remove,
+ CancellationToken cancellationToken)
+ {
+ var storage = _storageProvider.CreateStorage(descriptor.StorageId);
+ return await storage.UpdateMetadataAsync(set, remove, descriptor.Properties.ETag, cancellationToken);
+ }
+
+ private Dictionary CreateInitialProperties(DateTimeOffset minDueTime, DateTimeOffset maxDueTime, IDictionary? metadata)
+ {
+ var result = new Dictionary(StringComparer.Ordinal)
+ {
+ [OwnerProperty] = SiloAddress.ToParsableString(),
+ [MembershipVersionProperty] = GetMembershipVersionString(),
+ [MinDueTimeProperty] = minDueTime.ToString("O", CultureInfo.InvariantCulture),
+ [MaxDueTimeProperty] = maxDueTime.ToString("O", CultureInfo.InvariantCulture),
+ [AdoptedCountProperty] = "0",
+ [ClosedProperty] = bool.FalseString
+ };
+
+ if (metadata is not null)
+ {
+ foreach (var (key, value) in metadata)
+ {
+ result[MetadataPropertyPrefix + EncodeMetadataKey(key)] = value;
+ }
+ }
+
+ return result;
+ }
+
+ private string GetMembershipVersionString() => _membershipService.CurrentSnapshot.Version.Value.ToString(CultureInfo.InvariantCulture);
+
+ private static string EncodeMetadataKey(string key)
+ {
+ ArgumentNullException.ThrowIfNull(key);
+ return Convert.ToBase64String(Encoding.UTF8.GetBytes(key)).TrimEnd('=').Replace('+', '-').Replace('/', '_');
+ }
+
+ private static string DecodeMetadataKey(string encoded)
+ {
+ var base64 = encoded.Replace('-', '+').Replace('_', '/');
+ base64 = base64.PadRight(base64.Length + (4 - base64.Length % 4) % 4, '=');
+ return Encoding.UTF8.GetString(Convert.FromBase64String(base64));
+ }
+
+ private sealed class ShardCatalogProperties
+ {
+ private ShardCatalogProperties(
+ JournalId storageId,
+ JobShardId shardId,
+ IJournalMetadata properties,
+ SiloAddress? owner,
+ MembershipVersion membershipVersion,
+ DateTimeOffset startTime,
+ DateTimeOffset endTime,
+ int adoptedCount,
+ bool poisoned,
+ bool closed,
+ IReadOnlyDictionary metadata)
+ {
+ StorageId = storageId;
+ ShardId = shardId;
+ Properties = properties;
+ Owner = owner;
+ MembershipVersion = membershipVersion;
+ StartTime = startTime;
+ EndTime = endTime;
+ AdoptedCount = adoptedCount;
+ Poisoned = poisoned;
+ Closed = closed;
+ Metadata = metadata;
+ }
+
+ public JournalId StorageId { get; }
+
+ public JobShardId ShardId { get; }
+
+ public IJournalMetadata Properties { get; }
+
+ public SiloAddress? Owner { get; }
+
+ public MembershipVersion MembershipVersion { get; }
+
+ public DateTimeOffset StartTime { get; }
+
+ public DateTimeOffset EndTime { get; }
+
+ public int AdoptedCount { get; }
+
+ public bool Poisoned { get; }
+
+ public bool Closed { get; }
+
+ public IReadOnlyDictionary Metadata { get; }
+
+ public static ShardCatalogProperties? From(JournalId storageId, IJournalMetadata properties)
+ {
+ try
+ {
+ var values = properties.Properties;
+ if (!values.TryGetValue(MinDueTimeProperty, out var minDueTimeValue)
+ || !DateTimeOffset.TryParse(minDueTimeValue, CultureInfo.InvariantCulture, DateTimeStyles.RoundtripKind, out var minDueTime)
+ || !values.TryGetValue(MaxDueTimeProperty, out var maxDueTimeValue)
+ || !DateTimeOffset.TryParse(maxDueTimeValue, CultureInfo.InvariantCulture, DateTimeStyles.RoundtripKind, out var maxDueTime))
+ {
+ return null;
+ }
+
+ var owner = values.TryGetValue(OwnerProperty, out var ownerValue) && !string.IsNullOrWhiteSpace(ownerValue)
+ ? SiloAddress.FromParsableString(ownerValue)
+ : null;
+
+ var membershipVersion = values.TryGetValue(MembershipVersionProperty, out var membershipVersionValue)
+ && long.TryParse(membershipVersionValue, NumberStyles.Integer, CultureInfo.InvariantCulture, out var parsedMembershipVersion)
+ ? new MembershipVersion(parsedMembershipVersion)
+ : MembershipVersion.MinValue;
+
+ var adoptedCount = values.TryGetValue(AdoptedCountProperty, out var adoptedCountValue)
+ && int.TryParse(adoptedCountValue, NumberStyles.Integer, CultureInfo.InvariantCulture, out var parsedAdoptedCount)
+ ? parsedAdoptedCount
+ : 0;
+
+ var poisoned = values.TryGetValue(PoisonedProperty, out var poisonedValue)
+ && bool.TryParse(poisonedValue, out var parsedPoisoned)
+ && parsedPoisoned;
+
+ var closed = values.TryGetValue(ClosedProperty, out var closedValue)
+ && bool.TryParse(closedValue, out var parsedClosed)
+ && parsedClosed;
+
+ var metadata = new Dictionary(StringComparer.Ordinal);
+ foreach (var (key, value) in values)
+ {
+ if (key.StartsWith(MetadataPropertyPrefix, StringComparison.Ordinal))
+ {
+ metadata[DecodeMetadataKey(key[MetadataPropertyPrefix.Length..])] = value;
+ }
+ }
+
+ var shardId = JobShardId.FromJournalId(storageId);
+ return new(storageId, shardId, properties, owner, membershipVersion, minDueTime, maxDueTime, adoptedCount, poisoned, closed, metadata);
+ }
+ catch (Exception exception) when (exception is ArgumentException or FormatException)
+ {
+ return null;
+ }
+ }
+ }
+}
diff --git a/src/Orleans.DurableJobs/JournaledJobShardState.cs b/src/Orleans.DurableJobs/JournaledJobShardState.cs
new file mode 100644
index 00000000000..5a19f9f3b3e
--- /dev/null
+++ b/src/Orleans.DurableJobs/JournaledJobShardState.cs
@@ -0,0 +1,351 @@
+using System;
+using System.Linq;
+using Orleans.Journaling;
+
+namespace Orleans.DurableJobs;
+
+internal sealed class JournaledJobShardState : IJournaledState, IDurableValueCommandHandler
+{
+ public const string StateName = "jobs";
+
+ private readonly JobShardId _shardId;
+ private readonly IDurableValueCommandCodec? _codec;
+ private readonly TimeProvider _timeProvider;
+ private InMemoryJobQueue _jobQueue;
+ private JournalStreamWriter _writer;
+
+ public JournaledJobShardState(
+ JobShardId shardId,
+ DateTimeOffset startTime,
+ DateTimeOffset endTime,
+ IDurableValueCommandCodec codec,
+ TimeProvider? timeProvider = null)
+ : this(shardId, startTime, endTime, codec, timeProvider, isAddingCompleted: false)
+ {
+ ArgumentNullException.ThrowIfNull(codec);
+ }
+
+ internal JournaledJobShardState(JobShardId shardId, DateTimeOffset startTime, DateTimeOffset endTime, TimeProvider? timeProvider = null)
+ : this(shardId, startTime, endTime, codec: null, timeProvider: timeProvider, isAddingCompleted: false)
+ {
+ }
+
+ private JournaledJobShardState(
+ JobShardId shardId,
+ DateTimeOffset startTime,
+ DateTimeOffset endTime,
+ IDurableValueCommandCodec? codec,
+ TimeProvider? timeProvider,
+ bool isAddingCompleted)
+ {
+ if (endTime < startTime)
+ {
+ throw new ArgumentOutOfRangeException(nameof(endTime), "Shard end time must be greater than or equal to the start time.");
+ }
+
+ _shardId = shardId;
+ _codec = codec;
+ _timeProvider = timeProvider ?? TimeProvider.System;
+ _jobQueue = new(_timeProvider);
+ StartTime = startTime;
+ EndTime = endTime;
+ IsAddingCompleted = isAddingCompleted;
+ }
+
+ public string Id => _shardId.Value;
+
+ public DateTimeOffset StartTime { get; }
+
+ public DateTimeOffset EndTime { get; }
+
+ public bool IsAddingCompleted { get; private set; }
+
+ public int Count => _jobQueue.Count;
+
+ public IAsyncEnumerable ConsumeDurableJobsAsync() => _jobQueue;
+
+ public DurableJob? TryScheduleJob(ScheduleJobRequest request)
+ {
+ if (IsAddingCompleted)
+ {
+ return null;
+ }
+
+ if (request.DueTime < StartTime || request.DueTime > EndTime)
+ {
+ throw new ArgumentOutOfRangeException(nameof(request), "Scheduled time is out of shard bounds.");
+ }
+
+ var job = new DurableJob
+ {
+ Id = Guid.NewGuid().ToString(),
+ TargetGrainId = request.Target,
+ Name = request.JobName,
+ DueTime = request.DueTime,
+ ShardId = Id,
+ Metadata = request.Metadata
+ };
+
+ Write(DurableJobShardJournalRecord.ForSchedule(job));
+ ApplySchedule(job);
+ return job;
+ }
+
+ public bool RemoveJob(string jobId)
+ {
+ ArgumentException.ThrowIfNullOrWhiteSpace(jobId);
+
+ Write(DurableJobShardJournalRecord.ForRemove(jobId));
+ return ApplyRemove(jobId);
+ }
+
+ public bool RetryJobLater(IJobRunContext jobContext, DateTimeOffset newDueTime)
+ {
+ ArgumentNullException.ThrowIfNull(jobContext);
+ return RetryJobLater(jobContext.Job.Id, newDueTime, jobContext.DequeueCount);
+ }
+
+ public bool RetryJobLater(string jobId, DateTimeOffset newDueTime, int dequeueCount)
+ {
+ ArgumentException.ThrowIfNullOrWhiteSpace(jobId);
+ ValidateDequeueCount(dequeueCount);
+
+ Write(DurableJobShardJournalRecord.ForRetry(jobId, newDueTime, dequeueCount));
+ return ApplyRetry(jobId, newDueTime, dequeueCount);
+ }
+
+ public void MarkAsComplete()
+ {
+ IsAddingCompleted = true;
+ _jobQueue.MarkAsComplete();
+ }
+
+ internal DurableJobShardSnapshot CaptureSnapshot()
+ {
+ var jobs = _jobQueue.GetSnapshot()
+ .OrderBy(static item => item.Job.DueTime)
+ .ThenBy(static item => item.Job.Id, StringComparer.Ordinal)
+ .Select(static item => new DurableJobShardSnapshotEntry
+ {
+ Job = item.Job,
+ DequeueCount = item.DequeueCount
+ })
+ .ToList();
+
+ return new() { Jobs = jobs };
+ }
+
+ internal void Apply(DurableJobShardJournalRecord record)
+ {
+ ArgumentNullException.ThrowIfNull(record);
+
+ switch (record.Kind)
+ {
+ case DurableJobShardJournalRecordKind.Schedule:
+ ApplySchedule(GetRequired(record.Schedule, nameof(record.Schedule)).Job);
+ break;
+ case DurableJobShardJournalRecordKind.Remove:
+ ApplyRemove(GetRequired(record.Remove, nameof(record.Remove)).JobId);
+ break;
+ case DurableJobShardJournalRecordKind.Retry:
+ var retry = GetRequired(record.Retry, nameof(record.Retry));
+ ApplyRetry(retry.JobId, retry.DueTime, retry.DequeueCount);
+ break;
+ case DurableJobShardJournalRecordKind.Snapshot:
+ ApplySnapshot(GetRequired(record.Snapshot, nameof(record.Snapshot)));
+ break;
+ default:
+ throw new NotSupportedException($"DurableJobs shard journal record kind '{record.Kind}' is not supported.");
+ }
+ }
+
+ void IJournaledState.ReplayEntry(JournalEntry entry, JournalReplayContext context) =>
+ context.GetRequiredCommandCodec(entry.FormatKey, GetCodec()).Apply(entry.Reader, this);
+
+ void IDurableValueCommandHandler.ApplySet(DurableJobShardJournalRecord value) => Apply(value);
+
+ void IJournaledState.Reset(JournalStreamWriter writer)
+ {
+ _jobQueue = new(_timeProvider);
+ IsAddingCompleted = false;
+ _writer = writer;
+ }
+
+ void IJournaledState.AppendEntries(JournalStreamWriter writer)
+ {
+ }
+
+ void IJournaledState.AppendSnapshot(JournalStreamWriter writer)
+ {
+ GetCodec().WriteSet(DurableJobShardJournalRecord.ForSnapshot(CaptureSnapshot()), writer);
+ }
+
+ IJournaledState IJournaledState.DeepCopy() => throw new NotSupportedException();
+
+ private void Write(DurableJobShardJournalRecord record) => GetCodec().WriteSet(record, _writer);
+
+ private void ApplySchedule(DurableJob job) => _jobQueue.Enqueue(job, dequeueCount: 0);
+
+ private bool ApplyRemove(string jobId) => _jobQueue.CancelJob(jobId);
+
+ private bool ApplyRetry(string jobId, DateTimeOffset dueTime, int dequeueCount)
+ {
+ ValidateDequeueCount(dequeueCount);
+ return _jobQueue.RetryJobLater(jobId, dueTime, dequeueCount);
+ }
+
+ private void ApplySnapshot(DurableJobShardSnapshot snapshot)
+ {
+ ArgumentNullException.ThrowIfNull(snapshot);
+
+ _jobQueue.Clear();
+ foreach (var entry in snapshot.Jobs)
+ {
+ ArgumentNullException.ThrowIfNull(entry.Job);
+ ValidateDequeueCount(entry.DequeueCount);
+ _jobQueue.Enqueue(entry.Job, entry.DequeueCount);
+ }
+ }
+
+ private IDurableValueCommandCodec GetCodec()
+ => _codec ?? throw new InvalidOperationException("A DurableJobs shard journal operation codec is required before journal entries can be appended.");
+
+ private static T GetRequired(T? value, string propertyName) where T : class
+ => value ?? throw new InvalidOperationException($"DurableJobs shard journal record is missing required '{propertyName}' payload.");
+
+ private static void ValidateDequeueCount(int dequeueCount)
+ {
+ if (dequeueCount < 0)
+ {
+ throw new InvalidOperationException("DurableJobs shard journal dequeue count must not be negative.");
+ }
+ }
+}
+
+[GenerateSerializer]
+[Alias("Orleans.DurableJobs.DurableJobShardJournalRecordKind")]
+internal enum DurableJobShardJournalRecordKind : byte
+{
+ Schedule = 0,
+ Remove = 1,
+ Retry = 2,
+ Snapshot = 3
+}
+
+[GenerateSerializer]
+[Alias("Orleans.DurableJobs.DurableJobShardJournalRecord")]
+internal sealed class DurableJobShardJournalRecord
+{
+ [Id(0)]
+ public DurableJobShardJournalRecordKind Kind { get; init; }
+
+ [Id(1)]
+ public DurableJobShardScheduleOperation? Schedule { get; init; }
+
+ [Id(2)]
+ public DurableJobShardRemoveOperation? Remove { get; init; }
+
+ [Id(3)]
+ public DurableJobShardRetryOperation? Retry { get; init; }
+
+ [Id(4)]
+ public DurableJobShardSnapshot? Snapshot { get; init; }
+
+ public static DurableJobShardJournalRecord ForSchedule(DurableJob job)
+ {
+ ArgumentNullException.ThrowIfNull(job);
+
+ return new()
+ {
+ Kind = DurableJobShardJournalRecordKind.Schedule,
+ Schedule = new() { Job = job }
+ };
+ }
+
+ public static DurableJobShardJournalRecord ForRemove(string jobId)
+ {
+ ArgumentException.ThrowIfNullOrWhiteSpace(jobId);
+
+ return new()
+ {
+ Kind = DurableJobShardJournalRecordKind.Remove,
+ Remove = new() { JobId = jobId }
+ };
+ }
+
+ public static DurableJobShardJournalRecord ForRetry(string jobId, DateTimeOffset dueTime, int dequeueCount)
+ {
+ ArgumentException.ThrowIfNullOrWhiteSpace(jobId);
+
+ return new()
+ {
+ Kind = DurableJobShardJournalRecordKind.Retry,
+ Retry = new()
+ {
+ JobId = jobId,
+ DueTime = dueTime,
+ DequeueCount = dequeueCount
+ }
+ };
+ }
+
+ public static DurableJobShardJournalRecord ForSnapshot(DurableJobShardSnapshot snapshot)
+ {
+ ArgumentNullException.ThrowIfNull(snapshot);
+
+ return new()
+ {
+ Kind = DurableJobShardJournalRecordKind.Snapshot,
+ Snapshot = snapshot
+ };
+ }
+}
+
+[GenerateSerializer]
+[Alias("Orleans.DurableJobs.DurableJobShardScheduleOperation")]
+internal sealed class DurableJobShardScheduleOperation
+{
+ [Id(0)]
+ public DurableJob Job { get; init; } = null!;
+}
+
+[GenerateSerializer]
+[Alias("Orleans.DurableJobs.DurableJobShardRemoveOperation")]
+internal sealed class DurableJobShardRemoveOperation
+{
+ [Id(0)]
+ public string JobId { get; init; } = string.Empty;
+}
+
+[GenerateSerializer]
+[Alias("Orleans.DurableJobs.DurableJobShardRetryOperation")]
+internal sealed class DurableJobShardRetryOperation
+{
+ [Id(0)]
+ public string JobId { get; init; } = string.Empty;
+
+ [Id(1)]
+ public DateTimeOffset DueTime { get; init; }
+
+ [Id(2)]
+ public int DequeueCount { get; init; }
+}
+
+[GenerateSerializer]
+[Alias("Orleans.DurableJobs.DurableJobShardSnapshot")]
+internal sealed class DurableJobShardSnapshot
+{
+ [Id(0)]
+ public List Jobs { get; init; } = [];
+}
+
+[GenerateSerializer]
+[Alias("Orleans.DurableJobs.DurableJobShardSnapshotEntry")]
+internal sealed class DurableJobShardSnapshotEntry
+{
+ [Id(0)]
+ public DurableJob Job { get; init; } = null!;
+
+ [Id(1)]
+ public int DequeueCount { get; init; }
+}
diff --git a/src/Orleans.DurableJobs/LocalDurableJobManager.cs b/src/Orleans.DurableJobs/LocalDurableJobManager.cs
index 51e50030857..c5801c99121 100644
--- a/src/Orleans.DurableJobs/LocalDurableJobManager.cs
+++ b/src/Orleans.DurableJobs/LocalDurableJobManager.cs
@@ -16,10 +16,13 @@
namespace Orleans.DurableJobs;
///
-internal partial class LocalDurableJobManager : SystemTarget, ILocalDurableJobManager, ILifecycleParticipant
+internal partial class LocalDurableJobManager : SystemTarget, ILocalDurableJobManager, ILocalDurableJobManagerSystemTarget, ILifecycleParticipant
{
+ internal static readonly GrainType JobManagerGrainType = SystemTargetGrainId.CreateGrainType("job-manager");
+
private readonly JobShardManager _shardManager;
private readonly ShardExecutor _shardExecutor;
+ private readonly IInternalGrainFactory _grainFactory;
private readonly IAsyncEnumerable _clusterMembershipUpdates;
private readonly IOverloadDetector _overloadDetector;
private readonly TimeProvider _timeProvider;
@@ -45,16 +48,18 @@ internal partial class LocalDurableJobManager : SystemTarget, ILocalDurableJobMa
public LocalDurableJobManager(
JobShardManager shardManager,
ShardExecutor shardExecutor,
+ IInternalGrainFactory grainFactory,
IClusterMembershipService clusterMembership,
IOverloadDetector overloadDetector,
TimeProvider timeProvider,
IOptions options,
SystemTargetShared shared,
ILogger logger)
- : base(SystemTargetGrainId.CreateGrainType("job-manager"), shared)
+ : base(JobManagerGrainType, shared)
{
_shardManager = shardManager;
_shardExecutor = shardExecutor;
+ _grainFactory = grainFactory;
_clusterMembershipUpdates = clusterMembership.MembershipUpdates;
_overloadDetector = overloadDetector;
_timeProvider = timeProvider;
@@ -177,16 +182,35 @@ public async Task TryCancelDurableJobAsync(DurableJob job, CancellationTok
{
LogCancellingJob(_logger, job.Id, job.Name, job.ShardId);
- if (!_shardCache.TryGetValue(job.ShardId, out var shard))
+ if (_shardCache.TryGetValue(job.ShardId, out var shard))
+ {
+ if (!await _shardManager.IsShardOwnedByLocalSiloAsync(job.ShardId, cancellationToken))
+ {
+ LogJobCancellationFailed(_logger, job.Id, job.Name, job.ShardId);
+ return false;
+ }
+
+ using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, _cts.Token);
+ var wasRemoved = await shard.RemoveJobAsync(job.Id, linkedCts.Token);
+ LogJobCancelled(_logger, job.Id, job.Name, job.ShardId);
+ return wasRemoved;
+ }
+
+ var owner = await _shardManager.GetShardOwnerAsync(job.ShardId, cancellationToken);
+ if (owner is null || owner.Equals(Silo))
{
LogJobCancellationFailed(_logger, job.Id, job.Name, job.ShardId);
return false;
}
- using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, _cts.Token);
- var wasRemoved = await shard.RemoveJobAsync(job.Id, linkedCts.Token);
- LogJobCancelled(_logger, job.Id, job.Name, job.ShardId);
- return wasRemoved;
+ var remote = _grainFactory.GetSystemTarget(JobManagerGrainType, owner);
+ var routed = await remote.TryCancelDurableJobAsync(job, cancellationToken);
+ if (!routed)
+ {
+ LogJobCancellationFailed(_logger, job.Id, job.Name, job.ShardId);
+ }
+
+ return routed;
}
private async Task ProcessMembershipUpdates()
@@ -231,7 +255,7 @@ private async Task PeriodicShardCheck()
{
await Task.CompletedTask.ConfigureAwait(ConfigureAwaitOptions.ForceYielding | ConfigureAwaitOptions.ContinueOnCapturedContext);
- using var timer = new PeriodicTimer(TimeSpan.FromMinutes(10));
+ using var timer = new PeriodicTimer(TimeSpan.FromMinutes(10), _timeProvider);
Task timerTask = Task.CompletedTask;
while (!_cts.Token.IsCancellationRequested)
@@ -247,53 +271,7 @@ private async Task PeriodicShardCheck()
var signalTask = _shardCheckSignal.WaitAsync(_cts.Token);
await Task.WhenAny(timerTask, signalTask);
- LogCheckingPendingShards(_logger);
-
- // Clean up old writable shards that have passed their time window
- var now = DateTimeOffset.UtcNow;
- foreach (var key in _writeableShards.Keys.ToArray())
- {
- var shardEndTime = key.Add(_options.ShardDuration);
- if (shardEndTime < now)
- {
- _writeableShards.TryRemove(key, out _);
- }
- }
-
- // Compute the slow-start budget for this cycle
- var budget = ComputeClaimBudget();
-
- // Query ShardManager for assigned shards (source of truth)
- var shards = await _shardManager.AssignJobShardsAsync(DateTime.UtcNow.AddHours(1), budget, _cts.Token);
-
- // Count newly claimed shards (those not already in our cache)
- var newClaimsThisCycle = 0;
- if (shards.Count > 0)
- {
- LogAssignedShards(_logger, shards.Count);
- foreach (var shard in shards)
- {
- if (_shardCache.TryAdd(shard.Id, shard))
- {
- newClaimsThisCycle++;
- }
-
- if (!_runningShards.ContainsKey(shard.Id))
- {
- TryActivateShard(shard);
- }
- }
- }
- else
- {
- LogNoShardsToAssign(_logger);
- }
-
- if (newClaimsThisCycle > 0)
- {
- _totalClaimedShards += newClaimsThisCycle;
- LogOrphanedShardsClaimed(_logger, newClaimsThisCycle, _totalClaimedShards);
- }
+ await ProcessShardCheckCycleAsync(_cts.Token);
}
catch (OperationCanceledException)
{
@@ -307,6 +285,57 @@ private async Task PeriodicShardCheck()
}
}
+ internal async Task ProcessShardCheckCycleAsync(CancellationToken cancellationToken)
+ {
+ LogCheckingPendingShards(_logger);
+
+ // Clean up old writable shards that have passed their time window.
+ var now = _timeProvider.GetUtcNow();
+ foreach (var key in _writeableShards.Keys.ToArray())
+ {
+ var shardEndTime = key.Add(_options.ShardDuration);
+ if (shardEndTime < now && _writeableShards.TryRemove(key, out var expiredShard))
+ {
+ await expiredShard.MarkAsCompleteAsync(cancellationToken);
+ }
+ }
+
+ // Compute the slow-start budget for this cycle
+ var budget = ComputeClaimBudget();
+
+ // Query ShardManager for assigned shards (source of truth)
+ var shards = await _shardManager.AssignJobShardsAsync(now.AddHours(1), budget, cancellationToken);
+
+ // Count newly claimed shards (those not already in our cache)
+ var newClaimsThisCycle = 0;
+ if (shards.Count > 0)
+ {
+ LogAssignedShards(_logger, shards.Count);
+ foreach (var shard in shards)
+ {
+ if (_shardCache.TryAdd(shard.Id, shard))
+ {
+ newClaimsThisCycle++;
+ }
+
+ if (!_runningShards.ContainsKey(shard.Id))
+ {
+ TryActivateShard(shard);
+ }
+ }
+ }
+ else
+ {
+ LogNoShardsToAssign(_logger);
+ }
+
+ if (newClaimsThisCycle > 0)
+ {
+ _totalClaimedShards += newClaimsThisCycle;
+ LogOrphanedShardsClaimed(_logger, newClaimsThisCycle, _totalClaimedShards);
+ }
+ }
+
///
/// Computes the maximum number of orphaned shards this silo may claim in the current check cycle.
/// Returns when unlimited (ramp-up complete or disabled).
@@ -426,7 +455,26 @@ private async Task RunShardWithCleanupAsync(IJobShard shard)
private bool ShouldStartShardNow(IJobShard shard)
{
var activationTime = shard.StartTime.Subtract(_options.ShardActivationBufferPeriod);
- return DateTimeOffset.UtcNow >= activationTime;
+ return _timeProvider.GetUtcNow() >= activationTime;
+ }
+
+ internal sealed class TestAccessor(LocalDurableJobManager manager)
+ {
+ public Task ProcessShardCheckCycleAsync(CancellationToken cancellationToken) => manager.ProcessShardCheckCycleAsync(cancellationToken);
+
+ public void AddWritableShard(DateTimeOffset shardKey, IJobShard shard)
+ {
+ manager._writeableShards[shardKey] = shard;
+ manager._shardCache.TryAdd(shard.Id, shard);
+ }
+
+ public bool HasWritableShard(DateTimeOffset shardKey) => manager._writeableShards.ContainsKey(shardKey);
+
+ public void TryActivateShard(IJobShard shard) => manager.TryActivateShard(shard);
+
+ public bool TryGetRunningShardTask(string shardId, out Task? task) => manager._runningShards.TryGetValue(shardId, out task);
+
+ public bool HasCachedShard(string shardId) => manager._shardCache.ContainsKey(shardId);
}
private DateTimeOffset GetShardKey(DateTimeOffset scheduledTime)
diff --git a/src/Orleans.DurableJobs/Orleans.DurableJobs.csproj b/src/Orleans.DurableJobs/Orleans.DurableJobs.csproj
index ba79c4e09ad..e838ed729a7 100644
--- a/src/Orleans.DurableJobs/Orleans.DurableJobs.csproj
+++ b/src/Orleans.DurableJobs/Orleans.DurableJobs.csproj
@@ -11,11 +11,13 @@
$(VersionSuffix).alpha.1
alpha.1
enable
+ $(NoWarn);ORLEANSEXP005
+
@@ -27,6 +29,7 @@
+
diff --git a/src/Orleans.DurableJobs/README.md b/src/Orleans.DurableJobs/README.md
index 5738e1cd057..949bd1f9393 100644
--- a/src/Orleans.DurableJobs/README.md
+++ b/src/Orleans.DurableJobs/README.md
@@ -58,13 +58,10 @@ builder.UseOrleans(siloBuilder =>
siloBuilder
.UseLocalhostClustering()
// Configure Azure Storage Durable Jobs
- .UseAzureStorageDurableJobs(options =>
+ .UseAzureBlobDurableJobs(options =>
{
- options.Configure(o =>
- {
- o.BlobServiceClient = new Azure.Storage.Blobs.BlobServiceClient("YOUR_CONNECTION_STRING");
- o.ContainerName = "durable-jobs";
- });
+ options.BlobServiceClient = new Azure.Storage.Blobs.BlobServiceClient("YOUR_CONNECTION_STRING");
+ options.ContainerName = "durable-jobs";
});
});
diff --git a/src/Orleans.DurableJobs/ShardExecutor.cs b/src/Orleans.DurableJobs/ShardExecutor.cs
index 0c98e2c599b..6d4d5082e52 100644
--- a/src/Orleans.DurableJobs/ShardExecutor.cs
+++ b/src/Orleans.DurableJobs/ShardExecutor.cs
@@ -19,6 +19,7 @@ internal sealed partial class ShardExecutor
private readonly IInternalGrainFactory _grainFactory;
private readonly ILogger _logger;
private readonly DurableJobsOptions _options;
+ private readonly TimeProvider _timeProvider;
private readonly SemaphoreSlim _jobConcurrencyLimiter;
private readonly IOverloadDetector _overloadDetector;
private int _currentCapacity;
@@ -35,12 +36,14 @@ public ShardExecutor(
IInternalGrainFactory grainFactory,
IOptions options,
IOverloadDetector overloadDetector,
- ILogger logger)
+ ILogger logger,
+ TimeProvider? timeProvider = null)
{
_grainFactory = grainFactory;
_logger = logger;
_options = options.Value;
_overloadDetector = overloadDetector;
+ _timeProvider = timeProvider ?? TimeProvider.System;
_currentCapacity = _options.ConcurrencySlowStartEnabled && _options.SlowStartInitialConcurrency < _options.MaxConcurrentJobsPerSilo
? _options.SlowStartInitialConcurrency
@@ -68,12 +71,13 @@ public async Task RunShardAsync(IJobShard shard, CancellationToken cancellationT
var tasks = new ConcurrentDictionary();
try
{
- if (shard.StartTime > DateTime.UtcNow)
+ var now = _timeProvider.GetUtcNow();
+ if (shard.StartTime > now)
{
// Wait until the shard's start time
- var delay = shard.StartTime - DateTimeOffset.UtcNow;
+ var delay = shard.StartTime - now;
LogWaitingForShardStartTime(_logger, shard.Id, delay, shard.StartTime);
- await Task.Delay(delay, cancellationToken);
+ await Task.Delay(delay, _timeProvider, cancellationToken);
}
LogBeginProcessingShard(_logger, shard.Id);
@@ -87,7 +91,7 @@ public async Task RunShardAsync(IJobShard shard, CancellationToken cancellationT
LogOverloadDetected(_logger, shard.Id);
while (_overloadDetector.IsOverloaded)
{
- await Task.Delay(_options.OverloadBackoffDelay, cancellationToken);
+ await Task.Delay(_options.OverloadBackoffDelay, _timeProvider, cancellationToken);
}
LogOverloadCleared(_logger, shard.Id);
}
@@ -121,7 +125,7 @@ private async Task SlowStartRampUpAsync()
{
while (Volatile.Read(ref _currentCapacity) < targetCapacity)
{
- await Task.Delay(_options.SlowStartInterval);
+ await Task.Delay(_options.SlowStartInterval, _timeProvider, CancellationToken.None);
while (true)
{
@@ -191,7 +195,7 @@ private async Task RunJobAsync(
// Enter polling loop
LogPollingJob(_logger, jobContext.Job.Id, jobContext.Job.Name, result.PollAfterDelay.Value);
- await Task.Delay(result.PollAfterDelay.Value, cancellationToken);
+ await Task.Delay(result.PollAfterDelay.Value, _timeProvider, cancellationToken);
result = await target.HandleDurableJobAsync(jobContext, cancellationToken);
}
diff --git a/src/Orleans.Journaling/HostingExtensions.cs b/src/Orleans.Journaling/HostingExtensions.cs
index 1b663a4f80d..4a3832d95f7 100644
--- a/src/Orleans.Journaling/HostingExtensions.cs
+++ b/src/Orleans.Journaling/HostingExtensions.cs
@@ -9,9 +9,9 @@ public static class HostingExtensions
public static ISiloBuilder AddJournalStorage(this ISiloBuilder builder)
{
builder.Services.AddOptions();
- builder.Services.TryAddScoped();
+ builder.Services.TryAddSingleton();
builder.Services.TryAddScoped();
- builder.Services.TryAddScoped();
+ builder.Services.TryAddSingleton();
// Register JSON as the default format family and keep Orleans binary available for existing data.
builder.Services.AddJsonJournalFormat(new JsonJournalOptions().SerializerOptions, tryAdd: true);
diff --git a/src/Orleans.Journaling/IJournalStorage.cs b/src/Orleans.Journaling/IJournalStorage.cs
index 4cedd67fe35..e3808ce6874 100644
--- a/src/Orleans.Journaling/IJournalStorage.cs
+++ b/src/Orleans.Journaling/IJournalStorage.cs
@@ -14,14 +14,58 @@ public interface IJournalStorage
/// Implementations must notify when the read is complete by passing a
/// with set to .
/// Each call must pass metadata describing the journal file being read. If storage has no metadata,
- /// pass or . Metadata passed during one read must have the same
- /// value for every call.
+ /// pass or . Metadata passed during one read must have the same
+ /// value for every call.
///
/// The consumer of ordered raw journal data. Chunk boundaries are not journal-entry boundaries.
/// The cancellation token.
/// A representing the operation.
ValueTask ReadAsync(IJournalStorageConsumer consumer, CancellationToken cancellationToken);
+ ///
+ /// Creates this journal storage instance if it does not already exist.
+ ///
+ ///
+ /// Initial metadata is only applied when the storage instance is created. If the journal was
+ /// already created by a write, this method returns and does not update metadata.
+ ///
+ /// Initial caller-owned metadata properties.
+ /// The cancellation token.
+ /// if storage was created; otherwise, .
+ ValueTask CreateIfNotExistsAsync(
+ IReadOnlyDictionary? metadata = null,
+ CancellationToken cancellationToken = default)
+ => throw new NotSupportedException($"{nameof(IJournalStorage)} implementation does not support journal storage metadata operations.");
+
+ ///
+ /// Gets metadata for this journal storage instance.
+ ///
+ /// The cancellation token.
+ /// The metadata, or if the storage instance does not exist.
+ ValueTask GetMetadataAsync(CancellationToken cancellationToken = default)
+ => throw new NotSupportedException($"{nameof(IJournalStorage)} implementation does not support journal storage metadata operations.");
+
+ ///
+ /// Conditionally updates caller-owned metadata properties.
+ ///
+ ///
+ /// Implementations apply updates atomically against the current metadata. When
+ /// is not , providers which support ETags
+ /// must only apply the update if the current metadata ETag matches it. Provider-owned metadata
+ /// must be preserved.
+ ///
+ /// Metadata properties to set.
+ /// Metadata properties to remove.
+ /// The expected metadata ETag, or for an unconditional update.
+ /// The cancellation token.
+ /// The current metadata if the update was applied or made no changes; otherwise, .
+ ValueTask UpdateMetadataAsync(
+ IReadOnlyDictionary? set = null,
+ IEnumerable? remove = null,
+ string? expectedETag = null,
+ CancellationToken cancellationToken = default)
+ => throw new NotSupportedException($"{nameof(IJournalStorage)} implementation does not support journal storage metadata operations.");
+
///
/// Replaces the journal with the provided value atomically.
///
diff --git a/src/Orleans.Journaling/IJournalStorageCatalog.cs b/src/Orleans.Journaling/IJournalStorageCatalog.cs
new file mode 100644
index 00000000000..5c875b0069e
--- /dev/null
+++ b/src/Orleans.Journaling/IJournalStorageCatalog.cs
@@ -0,0 +1,19 @@
+namespace Orleans.Journaling;
+
+///
+/// Provides catalog operations for journal storage instances.
+///
+///
+/// A catalog only discovers storage identities. Storage lifecycle, metadata, and data mutation
+/// operations remain on .
+///
+public interface IJournalStorageCatalog
+{
+ ///
+ /// Lists journal ids which match .
+ ///
+ /// The journal id prefix, or the default value to list all ids.
+ /// The cancellation token.
+ /// Matching ids in lexicographic order.
+ IAsyncEnumerable ListAsync(JournalId prefix = default, CancellationToken cancellationToken = default);
+}
diff --git a/src/Orleans.Journaling/IJournalStorageConsumer.cs b/src/Orleans.Journaling/IJournalStorageConsumer.cs
index 3f4b9e22547..6ce8e500a12 100644
--- a/src/Orleans.Journaling/IJournalStorageConsumer.cs
+++ b/src/Orleans.Journaling/IJournalStorageConsumer.cs
@@ -9,40 +9,109 @@ public interface IJournalStorageConsumer
/// Reads buffered raw journal data.
///
/// The buffered journal data available to the consumer.
- /// The metadata associated with the journal file being read, or if no metadata is available.
- void Read(JournalBufferReader buffer, IJournalFileMetadata? metadata);
+ /// The metadata associated with the journal data being read, or if no metadata is available.
+ void Read(JournalBufferReader buffer, IJournalMetadata? metadata);
}
///
-/// Metadata associated with a journal file being read from storage.
+/// Metadata associated with journal storage.
///
-public interface IJournalFileMetadata
+public interface IJournalMetadata
{
///
/// Gets the journal format key stored with the journal data, or if no key is present.
///
string? Format { get; }
+
+ ///
+ /// Gets the storage metadata ETag, or if none is available.
+ ///
+ string? ETag { get; }
+
+ ///
+ /// Gets caller-owned storage metadata properties.
+ ///
+ IReadOnlyDictionary Properties { get; }
}
///
-/// Default implementation of .
+/// Default implementation of .
///
-public sealed class JournalFileMetadata : IJournalFileMetadata
+public sealed class JournalMetadata : IJournalMetadata
{
///
- /// Gets an empty metadata instance for journal data without storage metadata.
+ /// Gets an empty metadata instance.
///
- public static IJournalFileMetadata Empty { get; } = new JournalFileMetadata(format: null);
+ public static IJournalMetadata Empty { get; } = new JournalMetadata(format: null, eTag: null, properties: null);
///
- /// Initializes a new instance of the class.
+ /// Initializes a new instance of the class.
///
/// The journal format key stored with the journal data, or if no key is present.
- public JournalFileMetadata(string? format)
+ /// The storage metadata ETag, or if none is available.
+ /// Caller-owned storage metadata properties.
+ public JournalMetadata(string? format, string? eTag = null, IReadOnlyDictionary? properties = null)
{
Format = format;
+ ETag = eTag;
+ Properties = CopyProperties(properties);
}
///
public string? Format { get; }
+
+ ///
+ public string? ETag { get; }
+
+ ///
+ public IReadOnlyDictionary Properties { get; }
+
+ internal static Dictionary CopyProperties(IReadOnlyDictionary? properties)
+ {
+ var result = new Dictionary(StringComparer.Ordinal);
+ if (properties is null)
+ {
+ return result;
+ }
+
+ foreach (var (key, value) in properties)
+ {
+ ValidateCallerProperty(key, value);
+ result.Add(key, value);
+ }
+
+ return result;
+ }
+
+ internal static void ValidatePropertyName(string propertyName)
+ {
+ ArgumentException.ThrowIfNullOrWhiteSpace(propertyName);
+ if (propertyName.IndexOf('\0') >= 0)
+ {
+ throw new ArgumentException("Journal metadata property names must not contain null characters.", nameof(propertyName));
+ }
+ }
+
+ internal static void ValidateCallerPropertyName(string propertyName)
+ {
+ ValidatePropertyName(propertyName);
+ if (IsProviderOwned(propertyName))
+ {
+ throw new ArgumentException(
+ $"Journal metadata property '{propertyName}' is provider-owned. Caller updates must not set or remove provider-owned properties.",
+ nameof(propertyName));
+ }
+ }
+
+ internal static void ValidateCallerProperty(string propertyName, string value)
+ {
+ ValidateCallerPropertyName(propertyName);
+ ArgumentNullException.ThrowIfNull(value);
+ }
+
+ internal static bool IsProviderOwned(string propertyName)
+ {
+ ArgumentException.ThrowIfNullOrWhiteSpace(propertyName);
+ return propertyName.StartsWith("$", StringComparison.Ordinal);
+ }
}
diff --git a/src/Orleans.Journaling/JournalId.cs b/src/Orleans.Journaling/JournalId.cs
index 8af8ffe0f2b..7d06cd00e02 100644
--- a/src/Orleans.Journaling/JournalId.cs
+++ b/src/Orleans.Journaling/JournalId.cs
@@ -5,6 +5,8 @@ namespace Orleans.Journaling;
///
public readonly struct JournalId : IEquatable
{
+ private const char Separator = '/';
+
///
/// Initializes a new instance of the struct.
///
@@ -40,6 +42,78 @@ public static JournalId FromGrainId(GrainId grainId)
return new(grainId.ToString());
}
+ ///
+ /// Creates a journal id from decoded hierarchical segments.
+ ///
+ /// The first id segment.
+ /// Additional id segments.
+ /// The normalized journal id.
+ public static JournalId Create(string firstSegment, params ReadOnlySpan additionalSegments)
+ {
+ var encodedSegments = new string[additionalSegments.Length + 1];
+ encodedSegments[0] = EncodeSegment(firstSegment, nameof(firstSegment));
+ for (var i = 0; i < additionalSegments.Length; i++)
+ {
+ encodedSegments[i + 1] = EncodeSegment(additionalSegments[i], nameof(additionalSegments));
+ }
+
+ return new(string.Join(Separator, encodedSegments));
+ }
+
+ ///
+ /// Creates a journal id from decoded hierarchical segments.
+ ///
+ /// The id segments.
+ /// The normalized journal id.
+ public static JournalId Create(IEnumerable segments)
+ {
+ ArgumentNullException.ThrowIfNull(segments);
+
+ return Create(segments.ToArray().AsSpan());
+ }
+
+ ///
+ /// Creates a journal id from decoded hierarchical segments.
+ ///
+ /// The id segments.
+ /// The normalized journal id.
+ public static JournalId Create(ReadOnlySpan segments)
+ {
+ if (segments.Length == 0)
+ {
+ throw new ArgumentException("A journal id must contain at least one segment.", nameof(segments));
+ }
+
+ var encodedSegments = new string[segments.Length];
+ for (var i = 0; i < segments.Length; i++)
+ {
+ encodedSegments[i] = EncodeSegment(segments[i], nameof(segments));
+ }
+
+ return new(string.Join(Separator, encodedSegments));
+ }
+
+ ///
+ /// Determines whether this id is a prefix of .
+ ///
+ /// The journal id to test.
+ /// if this id is the default value, equals , or identifies an ancestor segment.
+ public bool IsPrefixOf(JournalId journalId)
+ {
+ if (IsDefault)
+ {
+ return true;
+ }
+
+ if (journalId.IsDefault)
+ {
+ return false;
+ }
+
+ return string.Equals(journalId.Value, Value, StringComparison.Ordinal)
+ || journalId.Value.StartsWith(Value + Separator, StringComparison.Ordinal);
+ }
+
///
public override string ToString() => Value ?? string.Empty;
@@ -67,4 +141,20 @@ public static JournalId FromGrainId(GrainId grainId)
/// The second journal id.
/// if the journal ids are not equal; otherwise, .
public static bool operator !=(JournalId left, JournalId right) => !left.Equals(right);
+
+ private static string EncodeSegment(string segment, string parameterName)
+ {
+ ArgumentException.ThrowIfNullOrWhiteSpace(segment, parameterName);
+ if (segment is "." or "..")
+ {
+ throw new ArgumentException("Journal id segments must not be '.' or '..'.", parameterName);
+ }
+
+ if (segment.IndexOf('\0') >= 0)
+ {
+ throw new ArgumentException("Journal id segments must not contain null characters.", parameterName);
+ }
+
+ return Uri.EscapeDataString(segment);
+ }
}
diff --git a/src/Orleans.Journaling/JournalStorageConsumerExtensions.cs b/src/Orleans.Journaling/JournalStorageConsumerExtensions.cs
index 2579931d612..cfce4a2190f 100644
--- a/src/Orleans.Journaling/JournalStorageConsumerExtensions.cs
+++ b/src/Orleans.Journaling/JournalStorageConsumerExtensions.cs
@@ -13,10 +13,10 @@ public static class JournalStorageConsumerExtensions
///
/// The journal storage consumer.
/// The metadata associated with the journal data being read, or if no metadata is available.
- public static void Complete(this IJournalStorageConsumer consumer, IJournalFileMetadata? metadata)
+ public static void Complete(this IJournalStorageConsumer consumer, IJournalMetadata? metadata)
{
ArgumentNullException.ThrowIfNull(consumer);
- metadata ??= JournalFileMetadata.Empty;
+ metadata ??= JournalMetadata.Empty;
using var buffer = new ArcBufferWriter();
ReadBuffer(consumer, buffer, metadata, isCompleted: true);
@@ -29,10 +29,10 @@ public static void Complete(this IJournalStorageConsumer consumer, IJournalFileM
/// The bytes to read.
/// The metadata associated with the journal data being read, or if no metadata is available.
/// Whether to notify the consumer that no more data will be supplied. If , the consumer must read all supplied bytes.
- public static void Read(this IJournalStorageConsumer consumer, ReadOnlyMemory input, IJournalFileMetadata? metadata, bool complete)
+ public static void Read(this IJournalStorageConsumer consumer, ReadOnlyMemory input, IJournalMetadata? metadata, bool complete)
{
ArgumentNullException.ThrowIfNull(consumer);
- metadata ??= JournalFileMetadata.Empty;
+ metadata ??= JournalMetadata.Empty;
using var buffer = new ArcBufferWriter();
if (!input.IsEmpty)
@@ -51,10 +51,10 @@ public static void Read(this IJournalStorageConsumer consumer, ReadOnlyMemoryThe bytes to read.
/// The metadata associated with the journal data being read, or if no metadata is available.
/// Whether to notify the consumer that no more data will be supplied. If , the consumer must read all supplied bytes.
- public static void Read(this IJournalStorageConsumer consumer, ReadOnlySequence input, IJournalFileMetadata? metadata, bool complete)
+ public static void Read(this IJournalStorageConsumer consumer, ReadOnlySequence input, IJournalMetadata? metadata, bool complete)
{
ArgumentNullException.ThrowIfNull(consumer);
- metadata ??= JournalFileMetadata.Empty;
+ metadata ??= JournalMetadata.Empty;
using var buffer = new ArcBufferWriter();
foreach (var segment in input)
@@ -78,11 +78,11 @@ public static void Read(this IJournalStorageConsumer consumer, ReadOnlySequence<
/// The ordered bytes to read.
/// The metadata associated with the journal data being read, or if no metadata is available.
/// Whether to notify the consumer that no more data will be supplied. If , the consumer must read all supplied bytes.
- public static void Read(this IJournalStorageConsumer consumer, IEnumerable> segments, IJournalFileMetadata? metadata, bool complete)
+ public static void Read(this IJournalStorageConsumer consumer, IEnumerable> segments, IJournalMetadata? metadata, bool complete)
{
ArgumentNullException.ThrowIfNull(consumer);
ArgumentNullException.ThrowIfNull(segments);
- metadata ??= JournalFileMetadata.Empty;
+ metadata ??= JournalMetadata.Empty;
using var buffer = new ArcBufferWriter();
foreach (var segment in segments)
@@ -99,7 +99,7 @@ public static void Read(this IJournalStorageConsumer consumer, IEnumerableThe metadata associated with the journal data being read, or if no metadata is available.
/// The cancellation token.
/// The number of bytes read from .
- public static async ValueTask ReadAsync(this IJournalStorageConsumer consumer, Stream input, IJournalFileMetadata? metadata, CancellationToken cancellationToken)
+ public static async ValueTask ReadAsync(this IJournalStorageConsumer consumer, Stream input, IJournalMetadata? metadata, CancellationToken cancellationToken)
=> await consumer.ReadAsync(input, metadata, complete: true, cancellationToken).ConfigureAwait(false);
///
@@ -132,11 +132,11 @@ public static async ValueTask ReadAsync(this IJournalStorageConsumer consu
/// Whether to notify the consumer that no more data will be supplied. If , the consumer must read all supplied bytes.
/// The cancellation token.
/// The number of bytes read from .
- public static async ValueTask ReadAsync(this IJournalStorageConsumer consumer, Stream input, IJournalFileMetadata? metadata, bool complete, CancellationToken cancellationToken)
+ public static async ValueTask ReadAsync(this IJournalStorageConsumer consumer, Stream input, IJournalMetadata? metadata, bool complete, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(consumer);
ArgumentNullException.ThrowIfNull(input);
- metadata ??= JournalFileMetadata.Empty;
+ metadata ??= JournalMetadata.Empty;
using var buffer = new ArcBufferWriter();
long totalBytesRead = 0;
@@ -157,7 +157,7 @@ public static async ValueTask ReadAsync(this IJournalStorageConsumer consu
}
}
- private static void ReadBuffer(IJournalStorageConsumer consumer, ArcBufferWriter buffer, IJournalFileMetadata metadata, bool isCompleted)
+ private static void ReadBuffer(IJournalStorageConsumer consumer, ArcBufferWriter buffer, IJournalMetadata metadata, bool isCompleted)
{
var readBuffer = new JournalBufferReader(buffer.Reader, isCompleted);
consumer.Read(readBuffer, metadata);
diff --git a/src/Orleans.Journaling/JournaledStateManager.cs b/src/Orleans.Journaling/JournaledStateManager.cs
index 348c44cf45d..f845e0de18b 100644
--- a/src/Orleans.Journaling/JournaledStateManager.cs
+++ b/src/Orleans.Journaling/JournaledStateManager.cs
@@ -665,7 +665,7 @@ private IJournalFormat GetJournalFormat(string journalFormatKey)
return JournalFormatServices.GetRequiredJournalFormat(_shared.ServiceProvider, journalFormatKey);
}
- private void ProcessRecoveryBuffer(JournalBufferReader buffer, IJournalFileMetadata? metadata)
+ private void ProcessRecoveryBuffer(JournalBufferReader buffer, IJournalMetadata? metadata)
{
if (buffer.Length == 0)
{
@@ -697,7 +697,7 @@ private void ProcessRecoveryBuffer(JournalBufferReader buffer, IJournalFileMetad
}
}
- void IJournalStorageConsumer.Read(JournalBufferReader buffer, IJournalFileMetadata? metadata) => ProcessRecoveryBuffer(buffer, metadata);
+ void IJournalStorageConsumer.Read(JournalBufferReader buffer, IJournalMetadata? metadata) => ProcessRecoveryBuffer(buffer, metadata);
private static bool ShouldWrapRecoveryFormatException(Exception exception) =>
exception is not OperationCanceledException && !IsRecoveryFormatException(exception);
diff --git a/src/Orleans.Journaling/VolatileJournalStorage.cs b/src/Orleans.Journaling/VolatileJournalStorage.cs
index 3465615f747..1b9899aa0d7 100644
--- a/src/Orleans.Journaling/VolatileJournalStorage.cs
+++ b/src/Orleans.Journaling/VolatileJournalStorage.cs
@@ -1,14 +1,16 @@
using System.Buffers;
using System.Collections.Concurrent;
+using System.Globalization;
+using System.Runtime.CompilerServices;
using Microsoft.Extensions.Options;
using Orleans.Journaling.Json;
namespace Orleans.Journaling;
-public sealed class VolatileJournalStorageProvider : IJournalStorageProvider
+public sealed class VolatileJournalStorageProvider : IJournalStorageProvider, IJournalStorageCatalog
{
private readonly IOptions? _options;
- private readonly ConcurrentDictionary _storage = new();
+ private readonly ConcurrentDictionary _storage = new(StringComparer.Ordinal);
public VolatileJournalStorageProvider()
{
@@ -32,24 +34,72 @@ public IJournalStorage CreateStorage(JournalId journalId)
}
var journalFormatKey = GetJournalFormatKey();
- var storage = _storage.GetOrAdd(journalId, _ => new VolatileJournalStorage(journalFormatKey));
- storage.SetConfiguredJournalFormatKey(journalFormatKey);
- return storage;
+ var store = _storage.GetOrAdd(journalId.Value, static key => new VolatileJournalStorage.Store(key));
+ return new VolatileJournalStorage(store, journalFormatKey);
+ }
+
+ public async IAsyncEnumerable ListAsync(
+ JournalId prefix = default,
+ [EnumeratorCancellation] CancellationToken cancellationToken = default)
+ {
+ List journalIds = [];
+ foreach (var (key, store) in _storage)
+ {
+ cancellationToken.ThrowIfCancellationRequested();
+ if (!TryParseJournalId(key, out var journalId) || !prefix.IsPrefixOf(journalId))
+ {
+ continue;
+ }
+
+ lock (store.SyncRoot)
+ {
+ if (!store.Exists)
+ {
+ continue;
+ }
+ }
+
+ journalIds.Add(journalId);
+ }
+
+ journalIds.Sort(static (left, right) => StringComparer.Ordinal.Compare(left.Value, right.Value));
+
+ foreach (var journalId in journalIds)
+ {
+ cancellationToken.ThrowIfCancellationRequested();
+ yield return journalId;
+ }
+
+ await Task.CompletedTask.ConfigureAwait(false);
}
private string GetJournalFormatKey()
=> JournalFormatServices.ValidateJournalFormatKey(_options?.Value.JournalFormatKey ?? JsonJournalExtensions.JournalFormatKey);
+
+ private static bool TryParseJournalId(string value, out JournalId journalId)
+ {
+ try
+ {
+ journalId = new JournalId(value);
+ return true;
+ }
+ catch (ArgumentException)
+ {
+ journalId = default;
+ return false;
+ }
+ }
}
+
///
/// An in-memory, volatile implementation of for non-durable use cases, such as development and testing.
///
public sealed class VolatileJournalStorage : IJournalStorage
{
- private readonly List _segments = [];
+ private readonly Store _store;
private string? _configuredJournalFormatKey;
- private string? _storedJournalFormatKey;
- public VolatileJournalStorage()
+ public VolatileJournalStorage() : this(new Store(CreateVolatileStorageId()), journalFormatKey: null)
{
}
@@ -57,32 +107,116 @@ public VolatileJournalStorage()
/// Initializes a new instance of the class.
///
/// The journal format key to stamp on writes.
- public VolatileJournalStorage(string? journalFormatKey)
+ public VolatileJournalStorage(string? journalFormatKey) : this(new Store(CreateVolatileStorageId()), journalFormatKey)
+ {
+ }
+
+ internal VolatileJournalStorage(Store store, string? journalFormatKey)
{
+ ArgumentNullException.ThrowIfNull(store);
+ _store = store;
SetConfiguredJournalFormatKey(journalFormatKey);
}
- public bool IsCompactionRequested => _segments.Count > 10;
+ public bool IsCompactionRequested
+ {
+ get
+ {
+ lock (_store.SyncRoot)
+ {
+ return _store.Segments.Count > 10;
+ }
+ }
+ }
- internal IReadOnlyList Segments => _segments;
+ internal IReadOnlyList Segments => _store.Segments;
internal string? StoredJournalFormatKey
- => _storedJournalFormatKey;
+ {
+ get
+ {
+ lock (_store.SyncRoot)
+ {
+ return _store.StoredJournalFormatKey;
+ }
+ }
+
+ set
+ {
+ lock (_store.SyncRoot)
+ {
+ _store.StoredJournalFormatKey = value;
+ }
+ }
+ }
internal void SetConfiguredJournalFormatKey(string? journalFormatKey)
{
_configuredJournalFormatKey = journalFormatKey;
}
+ public ValueTask CreateIfNotExistsAsync(
+ IReadOnlyDictionary? metadata = null,
+ CancellationToken cancellationToken = default)
+ {
+ cancellationToken.ThrowIfCancellationRequested();
+ var values = JournalMetadata.CopyProperties(metadata);
+ lock (_store.SyncRoot)
+ {
+ if (_store.Exists)
+ {
+ return new(false);
+ }
+
+ _store.Create(values);
+ return new(true);
+ }
+ }
+
+ public ValueTask GetMetadataAsync(CancellationToken cancellationToken = default)
+ {
+ cancellationToken.ThrowIfCancellationRequested();
+ lock (_store.SyncRoot)
+ {
+ return new(_store.Exists ? _store.GetMetadata() : null);
+ }
+ }
+
+ public ValueTask UpdateMetadataAsync(
+ IReadOnlyDictionary? set = null,
+ IEnumerable? remove = null,
+ string? expectedETag = null,
+ CancellationToken cancellationToken = default)
+ {
+ cancellationToken.ThrowIfCancellationRequested();
+ var setValues = JournalMetadata.CopyProperties(set);
+ var removeValues = CopyRemove(remove, setValues);
+ lock (_store.SyncRoot)
+ {
+ if (!_store.Exists || expectedETag is not null && !string.Equals(expectedETag, _store.ETag, StringComparison.Ordinal))
+ {
+ return new((IJournalMetadata?)null);
+ }
+
+ _store.ApplyMetadataUpdate(setValues, removeValues);
+ return new(_store.GetMetadata());
+ }
+ }
+
///
public ValueTask ReadAsync(IJournalStorageConsumer consumer, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(consumer);
- var metadata = _storedJournalFormatKey is null
- ? JournalFileMetadata.Empty
- : new JournalFileMetadata(_storedJournalFormatKey);
- consumer.Read(GetSegments(_segments, cancellationToken), metadata, complete: true);
+ byte[][] segments;
+ IJournalMetadata metadata;
+ lock (_store.SyncRoot)
+ {
+ metadata = _store.Exists ? _store.GetMetadata() : JournalMetadata.Empty;
+ segments = _store.Segments.ToArray();
+ }
+
+ consumer.Read(GetSegments(segments, cancellationToken), metadata, complete: true);
return default;
}
@@ -99,8 +233,14 @@ private static IEnumerable> GetSegments(IEnumerable
public ValueTask AppendAsync(ReadOnlySequence segment, CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();
- _storedJournalFormatKey = _configuredJournalFormatKey;
- _segments.Add(segment.ToArray());
+ lock (_store.SyncRoot)
+ {
+ _store.Exists = true;
+ _store.StoredJournalFormatKey = _configuredJournalFormatKey;
+ _store.Segments.Add(segment.ToArray());
+ _store.RefreshETag();
+ }
+
return default;
}
@@ -108,17 +248,131 @@ public ValueTask AppendAsync(ReadOnlySequence segment, CancellationToken c
public ValueTask ReplaceAsync(ReadOnlySequence snapshot, CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();
- _storedJournalFormatKey = _configuredJournalFormatKey;
- _segments.Clear();
- _segments.Add(snapshot.ToArray());
+ lock (_store.SyncRoot)
+ {
+ _store.Exists = true;
+ _store.StoredJournalFormatKey = _configuredJournalFormatKey;
+ _store.Segments.Clear();
+ _store.Segments.Add(snapshot.ToArray());
+ _store.RefreshETag();
+ }
+
return default;
}
public ValueTask DeleteAsync(CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();
- _segments.Clear();
- _storedJournalFormatKey = null;
+ lock (_store.SyncRoot)
+ {
+ _store.Delete();
+ }
+
return default;
}
+
+ private static string CreateVolatileStorageId() => $"volatile/{Guid.NewGuid():N}";
+
+ internal sealed class Store(string storageId)
+ {
+ public object SyncRoot { get; } = new();
+
+ public List Segments { get; } = [];
+
+ public Dictionary Properties { get; } = new(StringComparer.Ordinal);
+
+ public string? StoredJournalFormatKey { get; set; }
+
+ public bool Exists { get; set; }
+
+ public long Version { get; private set; }
+
+ public string? ETag { get; private set; }
+
+ public void Create(IReadOnlyDictionary? properties)
+ {
+ Exists = true;
+ Segments.Clear();
+ Properties.Clear();
+ StoredJournalFormatKey = null;
+ if (properties is not null)
+ {
+ foreach (var (key, value) in properties)
+ {
+ Properties.Add(key, value);
+ }
+ }
+
+ RefreshETag();
+ }
+
+ public void Delete()
+ {
+ Exists = false;
+ Segments.Clear();
+ Properties.Clear();
+ StoredJournalFormatKey = null;
+ ETag = null;
+ Version++;
+ }
+
+ public IJournalMetadata GetMetadata() => new JournalMetadata(StoredJournalFormatKey, ETag, Properties);
+
+ public bool ApplyMetadataUpdate(IReadOnlyDictionary set, IReadOnlySet remove)
+ {
+ var changed = false;
+ foreach (var propertyName in remove)
+ {
+ changed |= Properties.Remove(propertyName);
+ }
+
+ foreach (var (propertyName, value) in set)
+ {
+ if (!Properties.TryGetValue(propertyName, out var currentValue)
+ || !string.Equals(currentValue, value, StringComparison.Ordinal))
+ {
+ Properties[propertyName] = value;
+ changed = true;
+ }
+ }
+
+ if (changed)
+ {
+ RefreshETag();
+ }
+
+ return changed;
+ }
+
+ public string RefreshETag()
+ {
+ Exists = true;
+ ETag = (++Version).ToString("D", CultureInfo.InvariantCulture);
+ return ETag;
+ }
+
+ public override string ToString() => storageId;
+ }
+
+ private static IReadOnlySet CopyRemove(IEnumerable? remove, IReadOnlyDictionary set)
+ {
+ if (remove is null)
+ {
+ return new HashSet(StringComparer.Ordinal);
+ }
+
+ var result = new HashSet(StringComparer.Ordinal);
+ foreach (var key in remove)
+ {
+ JournalMetadata.ValidateCallerPropertyName(key);
+ if (set.ContainsKey(key))
+ {
+ throw new ArgumentException($"Journal metadata property '{key}' cannot be both set and removed.", nameof(remove));
+ }
+
+ result.Add(key);
+ }
+
+ return result;
+ }
}
diff --git a/src/api/Azure/Orleans.DurableJobs.AzureStorage/Orleans.DurableJobs.AzureStorage.cs b/src/api/Azure/Orleans.DurableJobs.AzureStorage/Orleans.DurableJobs.AzureStorage.cs
index 70e349bab36..a7f71e0c87a 100644
--- a/src/api/Azure/Orleans.DurableJobs.AzureStorage/Orleans.DurableJobs.AzureStorage.cs
+++ b/src/api/Azure/Orleans.DurableJobs.AzureStorage/Orleans.DurableJobs.AzureStorage.cs
@@ -6,61 +6,12 @@
// the code is regenerated.
//
//------------------------------------------------------------------------------
-namespace Orleans.DurableJobs.AzureStorage
-{
- public sealed partial class AzureStorageJobShardManager : JobShardManager
- {
- public AzureStorageJobShardManager(Runtime.ILocalSiloDetails localSiloDetails, Microsoft.Extensions.Options.IOptions options, Microsoft.Extensions.Options.IOptions durableJobsOptions, Runtime.IClusterMembershipService clusterMembership, Microsoft.Extensions.Logging.ILoggerFactory loggerFactory) : base(default!) { }
-
- public AzureStorageJobShardManager(Runtime.SiloAddress siloAddress, Azure.Storage.Blobs.BlobServiceClient client, string containerName, string blobPrefix, Hosting.AzureStorageJobShardOptions options, Microsoft.Extensions.Options.IOptions durableJobsOptions, Runtime.IClusterMembershipService clusterMembership, Microsoft.Extensions.Logging.ILoggerFactory loggerFactory) : base(default!) { }
-
- public override System.Threading.Tasks.Task> AssignJobShardsAsync(System.DateTimeOffset maxShardStartTime, int maxNewClaims, System.Threading.CancellationToken cancellationToken) { throw null; }
-
- public override System.Threading.Tasks.Task CreateShardAsync(System.DateTimeOffset minDueTime, System.DateTimeOffset maxDueTime, System.Collections.Generic.IDictionary metadata, System.Threading.CancellationToken cancellationToken) { throw null; }
-
- public override System.Threading.Tasks.Task UnregisterShardAsync(IJobShard shard, System.Threading.CancellationToken cancellationToken) { throw null; }
- }
-
- public static partial class NetstringJsonSerializer
- {
- public static System.Collections.Generic.IAsyncEnumerable DecodeAsync(System.IO.Stream stream, System.Text.Json.Serialization.Metadata.JsonTypeInfo jsonTypeInfo, System.Threading.CancellationToken cancellationToken) { throw null; }
-
- public static void Encode(T value, System.IO.Stream stream, System.Text.Json.Serialization.Metadata.JsonTypeInfo jsonTypeInfo) { }
- }
-}
-
namespace Orleans.Hosting
{
public static partial class AzureStorageDurableJobsExtensions
{
- public static Microsoft.Extensions.DependencyInjection.IServiceCollection UseAzureBlobDurableJobs(this Microsoft.Extensions.DependencyInjection.IServiceCollection services, System.Action> configureOptions) { throw null; }
-
- public static Microsoft.Extensions.DependencyInjection.IServiceCollection UseAzureBlobDurableJobs(this Microsoft.Extensions.DependencyInjection.IServiceCollection services, System.Action configure) { throw null; }
-
- public static ISiloBuilder UseAzureBlobDurableJobs(this ISiloBuilder builder, System.Action> configureOptions) { throw null; }
-
- public static ISiloBuilder UseAzureBlobDurableJobs(this ISiloBuilder builder, System.Action configure) { throw null; }
- }
-
- public partial class AzureStorageJobShardOptions
- {
- public System.TimeSpan BatchFlushInterval { get { throw null; } set { } }
-
- public Azure.Storage.Blobs.BlobServiceClient BlobServiceClient { get { throw null; } set { } }
-
- public string ContainerName { get { throw null; } set { } }
-
- public int MaxBatchSize { get { throw null; } set { } }
-
- public int MaxBlobCreationRetries { get { throw null; } }
-
- public int MinBatchSize { get { throw null; } set { } }
- }
-
- public partial class AzureStorageJobShardOptionsValidator : IConfigurationValidator
- {
- public AzureStorageJobShardOptionsValidator(AzureStorageJobShardOptions options, string name) { }
+ public static Microsoft.Extensions.DependencyInjection.IServiceCollection UseAzureBlobDurableJobs(this Microsoft.Extensions.DependencyInjection.IServiceCollection services, System.Action configure) { throw null; }
- public void ValidateConfiguration() { }
+ public static ISiloBuilder UseAzureBlobDurableJobs(this ISiloBuilder builder, System.Action configure) { throw null; }
}
}
\ No newline at end of file
diff --git a/src/api/Orleans.Journaling/Orleans.Journaling.cs b/src/api/Orleans.Journaling/Orleans.Journaling.cs
index e9fdd72be71..b491fa2e1fa 100644
--- a/src/api/Orleans.Journaling/Orleans.Journaling.cs
+++ b/src/api/Orleans.Journaling/Orleans.Journaling.cs
@@ -234,11 +234,6 @@ public partial interface IJournaledStateManagerFactory
IJournaledStateManager Create(JournalId journalId);
}
- public partial interface IJournalFileMetadata
- {
- string? Format { get; }
- }
-
public partial interface IJournalFormat
{
string FormatKey { get; }
@@ -249,19 +244,36 @@ public partial interface IJournalFormat
void Replay(JournalBufferReader input, JournalReplayContext context);
}
+ public partial interface IJournalMetadata
+ {
+ string? ETag { get; }
+
+ string? Format { get; }
+
+ System.Collections.Generic.IReadOnlyDictionary Properties { get; }
+ }
+
public partial interface IJournalStorage
{
bool IsCompactionRequested { get; }
System.Threading.Tasks.ValueTask AppendAsync(System.Buffers.ReadOnlySequence value, System.Threading.CancellationToken cancellationToken);
+ System.Threading.Tasks.ValueTask CreateIfNotExistsAsync(System.Collections.Generic.IReadOnlyDictionary? metadata = null, System.Threading.CancellationToken cancellationToken = default);
System.Threading.Tasks.ValueTask DeleteAsync(System.Threading.CancellationToken cancellationToken);
+ System.Threading.Tasks.ValueTask GetMetadataAsync(System.Threading.CancellationToken cancellationToken = default);
System.Threading.Tasks.ValueTask ReadAsync(IJournalStorageConsumer consumer, System.Threading.CancellationToken cancellationToken);
System.Threading.Tasks.ValueTask ReplaceAsync(System.Buffers.ReadOnlySequence value, System.Threading.CancellationToken cancellationToken);
+ System.Threading.Tasks.ValueTask UpdateMetadataAsync(System.Collections.Generic.IReadOnlyDictionary? set = null, System.Collections.Generic.IEnumerable? remove = null, string? expectedETag = null, System.Threading.CancellationToken cancellationToken = default);
+ }
+
+ public partial interface IJournalStorageCatalog
+ {
+ System.Collections.Generic.IAsyncEnumerable ListAsync(JournalId prefix = default, System.Threading.CancellationToken cancellationToken = default);
}
public partial interface IJournalStorageConsumer
{
- void Read(JournalBufferReader buffer, IJournalFileMetadata? metadata);
+ void Read(JournalBufferReader buffer, IJournalMetadata? metadata);
}
public partial interface IJournalStorageProvider
@@ -379,15 +391,6 @@ public void Commit() { }
public void Dispose() { }
}
- public sealed partial class JournalFileMetadata : IJournalFileMetadata
- {
- public JournalFileMetadata(string? format) { }
-
- public static IJournalFileMetadata Empty { get { throw null; } }
-
- public string? Format { get { throw null; } }
- }
-
public readonly partial struct JournalId : System.IEquatable
{
private readonly object _dummy;
@@ -398,6 +401,12 @@ public JournalId(string value) { }
public string Value { get { throw null; } }
+ public static JournalId Create(System.Collections.Generic.IEnumerable segments) { throw null; }
+
+ public static JournalId Create(System.ReadOnlySpan segments) { throw null; }
+
+ public static JournalId Create(string firstSegment, params System.ReadOnlySpan additionalSegments) { throw null; }
+
public readonly bool Equals(JournalId other) { throw null; }
public override readonly bool Equals(object? obj) { throw null; }
@@ -406,6 +415,8 @@ public JournalId(string value) { }
public override readonly int GetHashCode() { throw null; }
+ public readonly bool IsPrefixOf(JournalId journalId) { throw null; }
+
public static bool operator ==(JournalId left, JournalId right) { throw null; }
public static bool operator !=(JournalId left, JournalId right) { throw null; }
@@ -413,6 +424,19 @@ public JournalId(string value) { }
public override readonly string ToString() { throw null; }
}
+ public sealed partial class JournalMetadata : IJournalMetadata
+ {
+ public JournalMetadata(string? format, string? eTag = null, System.Collections.Generic.IReadOnlyDictionary? properties = null) { }
+
+ public static IJournalMetadata Empty { get { throw null; } }
+
+ public string? ETag { get { throw null; } }
+
+ public string? Format { get { throw null; } }
+
+ public System.Collections.Generic.IReadOnlyDictionary Properties { get { throw null; } }
+ }
+
public readonly partial struct JournalReplayContext
{
private readonly object _dummy;
@@ -428,17 +452,17 @@ public readonly partial struct JournalReplayContext
public static partial class JournalStorageConsumerExtensions
{
- public static void Complete(this IJournalStorageConsumer consumer, IJournalFileMetadata? metadata) { }
+ public static void Complete(this IJournalStorageConsumer consumer, IJournalMetadata? metadata) { }
- public static void Read(this IJournalStorageConsumer consumer, System.Buffers.ReadOnlySequence input, IJournalFileMetadata? metadata, bool complete) { }
+ public static void Read(this IJournalStorageConsumer consumer, System.Buffers.ReadOnlySequence input, IJournalMetadata? metadata, bool complete) { }
- public static void Read(this IJournalStorageConsumer consumer, System.Collections.Generic.IEnumerable> segments, IJournalFileMetadata? metadata, bool complete) { }
+ public static void Read(this IJournalStorageConsumer consumer, System.Collections.Generic.IEnumerable> segments, IJournalMetadata? metadata, bool complete) { }
- public static void Read(this IJournalStorageConsumer consumer, System.ReadOnlyMemory input, IJournalFileMetadata? metadata, bool complete) { }
+ public static void Read(this IJournalStorageConsumer consumer, System.ReadOnlyMemory input, IJournalMetadata? metadata, bool complete) { }
- public static System.Threading.Tasks.ValueTask ReadAsync(this IJournalStorageConsumer consumer, System.IO.Stream input, IJournalFileMetadata? metadata, bool complete, System.Threading.CancellationToken cancellationToken) { throw null; }
+ public static System.Threading.Tasks.ValueTask ReadAsync(this IJournalStorageConsumer consumer, System.IO.Stream input, IJournalMetadata? metadata, bool complete, System.Threading.CancellationToken cancellationToken) { throw null; }
- public static System.Threading.Tasks.ValueTask ReadAsync(this IJournalStorageConsumer consumer, System.IO.Stream input, IJournalFileMetadata? metadata, System.Threading.CancellationToken cancellationToken) { throw null; }
+ public static System.Threading.Tasks.ValueTask ReadAsync(this IJournalStorageConsumer consumer, System.IO.Stream input, IJournalMetadata? metadata, System.Threading.CancellationToken cancellationToken) { throw null; }
}
public readonly partial struct JournalStreamId : System.IEquatable
@@ -487,20 +511,28 @@ public VolatileJournalStorage(string? journalFormatKey) { }
public System.Threading.Tasks.ValueTask AppendAsync(System.Buffers.ReadOnlySequence segment, System.Threading.CancellationToken cancellationToken) { throw null; }
+ public System.Threading.Tasks.ValueTask CreateIfNotExistsAsync(System.Collections.Generic.IReadOnlyDictionary? metadata = null, System.Threading.CancellationToken cancellationToken = default) { throw null; }
+
public System.Threading.Tasks.ValueTask DeleteAsync(System.Threading.CancellationToken cancellationToken) { throw null; }
+ public System.Threading.Tasks.ValueTask GetMetadataAsync(System.Threading.CancellationToken cancellationToken = default) { throw null; }
+
public System.Threading.Tasks.ValueTask ReadAsync(IJournalStorageConsumer consumer, System.Threading.CancellationToken cancellationToken) { throw null; }
public System.Threading.Tasks.ValueTask ReplaceAsync(System.Buffers.ReadOnlySequence snapshot, System.Threading.CancellationToken cancellationToken) { throw null; }
+
+ public System.Threading.Tasks.ValueTask UpdateMetadataAsync(System.Collections.Generic.IReadOnlyDictionary? set = null, System.Collections.Generic.IEnumerable? remove = null, string? expectedETag = null, System.Threading.CancellationToken cancellationToken = default) { throw null; }
}
- public sealed partial class VolatileJournalStorageProvider : IJournalStorageProvider
+ public sealed partial class VolatileJournalStorageProvider : IJournalStorageProvider, IJournalStorageCatalog
{
public VolatileJournalStorageProvider() { }
public VolatileJournalStorageProvider(Microsoft.Extensions.Options.IOptions options) { }
public IJournalStorage CreateStorage(JournalId journalId) { throw null; }
+
+ public System.Collections.Generic.IAsyncEnumerable ListAsync(JournalId prefix = default, System.Threading.CancellationToken cancellationToken = default) { throw null; }
}
}
@@ -658,4 +690,4 @@ public void WriteField(ref global::Orleans.Serialization.Buffers.
public sealed partial class Copier_DurableTaskCompletionSourceState : global::Orleans.Serialization.Cloning.ShallowCopier>
{
}
-}
\ No newline at end of file
+}
diff --git a/test/Extensions/Orleans.Azure.Tests/AzureStorageOperationOptionsExtensions.cs b/test/Extensions/Orleans.Azure.Tests/AzureStorageOperationOptionsExtensions.cs
index 9fc2526ecf6..7955815ced7 100644
--- a/test/Extensions/Orleans.Azure.Tests/AzureStorageOperationOptionsExtensions.cs
+++ b/test/Extensions/Orleans.Azure.Tests/AzureStorageOperationOptionsExtensions.cs
@@ -59,20 +59,6 @@ public static Orleans.Configuration.AzureBlobStorageOptions ConfigureTestDefault
return options;
}
- public static AzureStorageJobShardOptions ConfigureTestDefaults(this AzureStorageJobShardOptions options)
- {
- if (TestDefaultConfiguration.UseAadAuthentication)
- {
- options.BlobServiceClient = new(TestDefaultConfiguration.DataBlobUri, TestDefaultConfiguration.TokenCredential);
- }
- else
- {
- options.BlobServiceClient = new(TestDefaultConfiguration.DataConnectionString);
- }
-
- return options;
- }
-
public static Orleans.Configuration.AzureQueueOptions ConfigureTestDefaults(this Orleans.Configuration.AzureQueueOptions options)
{
if (TestDefaultConfiguration.UseAadAuthentication)
diff --git a/test/Extensions/Orleans.Azure.Tests/DurableJobs/AzureStorageJobShardBatchingTests.cs b/test/Extensions/Orleans.Azure.Tests/DurableJobs/AzureStorageJobShardBatchingTests.cs
deleted file mode 100644
index 63921e9a557..00000000000
--- a/test/Extensions/Orleans.Azure.Tests/DurableJobs/AzureStorageJobShardBatchingTests.cs
+++ /dev/null
@@ -1,328 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Collections.Immutable;
-using System.Linq;
-using System.Net;
-using System.Threading;
-using System.Threading.Tasks;
-using Microsoft.Extensions.Logging.Abstractions;
-using Microsoft.Extensions.Options;
-using Orleans.Hosting;
-using Orleans.Runtime;
-using Orleans.DurableJobs;
-using Orleans.DurableJobs.AzureStorage;
-using Tester.AzureUtils;
-using Xunit;
-
-namespace Tester.AzureUtils.DurableJobs;
-
-///
-/// Azure Storage-specific tests for job shard batching functionality.
-/// These tests verify Azure-specific batching behaviors that don't apply to all providers.
-///
-[TestCategory("DurableJobs")]
-public class AzureStorageJobShardBatchingTests : AzureStorageBasicTests, IAsyncDisposable
-{
- private readonly IDictionary _metadata = new Dictionary
- {
- { "CreatedBy", "UnitTest" },
- { "Purpose", "Testing" }
- };
-
- internal InMemoryClusterMembershipService MembershipService { get; }
-
- internal IOptions StorageOptions { get; }
- internal IOptions DurableJobsOptions { get; }
-
- public AzureStorageJobShardBatchingTests()
- {
- MembershipService = new InMemoryClusterMembershipService();
- StorageOptions = Options.Create(new AzureStorageJobShardOptions());
- DurableJobsOptions = Options.Create(new DurableJobsOptions());
- StorageOptions.Value.ConfigureTestDefaults();
- StorageOptions.Value.ContainerName = "test-batch-container-" + Guid.NewGuid().ToString("N");
- }
-
- public async ValueTask DisposeAsync()
- {
- // Cleanup storage container
- var client = StorageOptions.Value.BlobServiceClient;
- var container = client.GetBlobContainerClient(StorageOptions.Value.ContainerName);
- await container.DeleteIfExistsAsync();
- }
-
- public class TestLocalSiloDetails : ILocalSiloDetails
- {
- public TestLocalSiloDetails(SiloAddress siloAddress)
- {
- SiloAddress = siloAddress;
- }
-
- public string Name => SiloAddress.ToString();
-
- public string ClusterId => "TestCluster";
-
- public string DnsHostName => SiloAddress.ToString();
-
- public SiloAddress SiloAddress { get; }
-
- public SiloAddress GatewayAddress => SiloAddress;
- }
-
- internal AzureStorageJobShardManager CreateManager(SiloAddress siloAddress)
- {
- var localSiloDetails = new TestLocalSiloDetails(siloAddress);
- return new AzureStorageJobShardManager(localSiloDetails, StorageOptions, DurableJobsOptions, MembershipService, NullLoggerFactory.Instance);
- }
-
- internal void SetSiloStatus(SiloAddress siloAddress, SiloStatus status)
- {
- MembershipService.SetSiloStatus(siloAddress, status);
- }
-
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShard_MultipleOperationsBatched()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- var cancellationToken = cts.Token;
- // Configure batching options to batch multiple operations
- StorageOptions.Value.MinBatchSize = 5;
- StorageOptions.Value.MaxBatchSize = 50;
- StorageOptions.Value.BatchFlushInterval = TimeSpan.FromMilliseconds(100);
-
- var localAddress = SiloAddress.New(new IPEndPoint(IPAddress.Loopback, 5000), 0);
- SetSiloStatus(localAddress, SiloStatus.Active);
- var manager = CreateManager(localAddress);
-
- var date = DateTime.UtcNow;
- var shard = await manager.CreateShardAsync(date, date.AddHours(1), _metadata, cancellationToken);
-
- // Schedule 10 jobs rapidly to trigger batching
- var tasks = new List();
- for (int i = 0; i < 10; i++)
- {
- tasks.Add(shard.TryScheduleJobAsync(new ScheduleJobRequest { Target = GrainId.Create("type", $"target{i}"), JobName = $"job{i}", DueTime = date.AddMilliseconds(i * 10d), Metadata = null }, cancellationToken));
- }
-
- await Task.WhenAll(tasks);
-
- // Wait for batches to flush
- await Task.Delay(TimeSpan.FromMilliseconds(300), cancellationToken);
-
- // Verify batching occurred - should have fewer committed blocks than individual operations
- var azureShard = (AzureStorageJobShard)shard;
- Assert.True(azureShard.CommitedBlockCount < 10, $"Expected batching to reduce block count, but got {azureShard.CommitedBlockCount}");
-
- // Verify all jobs were persisted by marking silo as dead and reassigning
- SetSiloStatus(localAddress, SiloStatus.Dead);
- var newSiloAddress = SiloAddress.New(new IPEndPoint(IPAddress.Loopback, 5000), 1);
- SetSiloStatus(newSiloAddress, SiloStatus.Active);
-
- var newManager = CreateManager(newSiloAddress);
- var shards = await newManager.AssignJobShardsAsync(DateTime.UtcNow.AddHours(1), maxNewClaims: int.MaxValue, cancellationToken);
- Assert.Single(shards);
-
- var consumedJobs = new List();
- await foreach (var jobCtx in shards[0].ConsumeDurableJobsAsync().WithCancellation(cancellationToken))
- {
- consumedJobs.Add(jobCtx.Job.Name);
- await shards[0].RemoveJobAsync(jobCtx.Job.Id, cancellationToken);
- }
-
- Assert.Equal(10, consumedJobs.Count);
- await newManager.UnregisterShardAsync(shards[0], cancellationToken);
- }
-
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShard_PartialBatchFlushesOnTimeout()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- var cancellationToken = cts.Token;
- // Configure batching to require 10 operations but with a short timeout
- StorageOptions.Value.MinBatchSize = 10;
- StorageOptions.Value.MaxBatchSize = 100;
- StorageOptions.Value.BatchFlushInterval = TimeSpan.FromMilliseconds(200);
-
- var localAddress = SiloAddress.New(new IPEndPoint(IPAddress.Loopback, 5000), 0);
- SetSiloStatus(localAddress, SiloStatus.Active);
- var manager = CreateManager(localAddress);
-
- var date = DateTime.UtcNow;
- var shard = await manager.CreateShardAsync(date, date.AddHours(1), _metadata, cancellationToken);
-
- // Schedule only 3 jobs (less than MinBatchSize of 10)
- var tasks = new Task[3];
- tasks[0] = shard.TryScheduleJobAsync(new ScheduleJobRequest { Target = GrainId.Create("type", "target1"), JobName = "job1", DueTime = date.AddSeconds(1), Metadata = null }, cancellationToken);
- tasks[1] = shard.TryScheduleJobAsync(new ScheduleJobRequest { Target = GrainId.Create("type", "target2"), JobName = "job2", DueTime = date.AddSeconds(2), Metadata = null }, cancellationToken);
- tasks[2] = shard.TryScheduleJobAsync(new ScheduleJobRequest { Target = GrainId.Create("type", "target3"), JobName = "job3", DueTime = date.AddSeconds(3), Metadata = null }, cancellationToken);
-
- await Task.WhenAll(tasks);
-
- // Verify that the partial batch was flushed - should have 1 committed block
- var azureShard = (AzureStorageJobShard)shard;
- Assert.Equal(1, azureShard.CommitedBlockCount);
-
- // Verify jobs were persisted despite not reaching MinBatchSize
- SetSiloStatus(localAddress, SiloStatus.Dead);
- var newSiloAddress = SiloAddress.New(new IPEndPoint(IPAddress.Loopback, 5000), 1);
- SetSiloStatus(newSiloAddress, SiloStatus.Active);
-
- var newManager = CreateManager(newSiloAddress);
- var shards = await newManager.AssignJobShardsAsync(DateTime.UtcNow.AddHours(1), maxNewClaims: int.MaxValue, cancellationToken);
- Assert.Single(shards);
-
- var consumedJobs = new List();
- await foreach (var jobCtx in shards[0].ConsumeDurableJobsAsync().WithCancellation(cancellationToken))
- {
- consumedJobs.Add(jobCtx.Job.Name);
- await shards[0].RemoveJobAsync(jobCtx.Job.Id, cancellationToken);
- }
-
- Assert.Equal(3, consumedJobs.Count);
- await newManager.UnregisterShardAsync(shards[0], cancellationToken);
- }
-
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShard_MaxBatchSizeEnforced()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- var cancellationToken = cts.Token;
- // Configure batching with a small max batch size
- StorageOptions.Value.MinBatchSize = 1;
- StorageOptions.Value.MaxBatchSize = 20;
- StorageOptions.Value.BatchFlushInterval = TimeSpan.FromMilliseconds(50);
-
- var localAddress = SiloAddress.New(new IPEndPoint(IPAddress.Loopback, 5000), 0);
- SetSiloStatus(localAddress, SiloStatus.Active);
- var manager = CreateManager(localAddress);
-
- var date = DateTime.UtcNow;
- var shard = await manager.CreateShardAsync(date, date.AddHours(1), _metadata, cancellationToken);
-
- // Schedule 50 jobs rapidly (exceeds MaxBatchSize of 20)
- var tasks = new List();
- for (int i = 0; i < 50; i++)
- {
- tasks.Add(shard.TryScheduleJobAsync(new ScheduleJobRequest { Target = GrainId.Create("type", $"target{i}"), JobName = $"job{i}", DueTime = date.AddMilliseconds(i), Metadata = null }, cancellationToken));
- }
-
- await Task.WhenAll(tasks);
-
- // Wait for all batches to flush
- await Task.Delay(TimeSpan.FromMilliseconds(500), cancellationToken);
-
- // Verify multiple batches were created due to MaxBatchSize limit
- // With 50 jobs and MaxBatchSize=20, expect at least 3 blocks (50/20 = 2.5, rounded up)
- var azureShard = (AzureStorageJobShard)shard;
- Assert.True(azureShard.CommitedBlockCount >= 3, $"Expected at least 3 blocks for 50 jobs with MaxBatchSize=20, but got {azureShard.CommitedBlockCount}");
-
- // Verify all jobs were persisted (should be split into multiple batches)
- SetSiloStatus(localAddress, SiloStatus.Dead);
- var newSiloAddress = SiloAddress.New(new IPEndPoint(IPAddress.Loopback, 5000), 1);
- SetSiloStatus(newSiloAddress, SiloStatus.Active);
-
- var newManager = CreateManager(newSiloAddress);
- var shards = await newManager.AssignJobShardsAsync(DateTime.UtcNow.AddHours(1), maxNewClaims: int.MaxValue, cancellationToken);
- Assert.Single(shards);
-
- var consumedJobs = new List();
- await foreach (var jobCtx in shards[0].ConsumeDurableJobsAsync().WithCancellation(cancellationToken))
- {
- consumedJobs.Add(jobCtx.Job.Name);
- await shards[0].RemoveJobAsync(jobCtx.Job.Id, cancellationToken);
- }
-
- Assert.Equal(50, consumedJobs.Count);
- await newManager.UnregisterShardAsync(shards[0], cancellationToken);
- }
-
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShard_MetadataOperationsBreakBatches()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- var cancellationToken = cts.Token;
- // Configure batching to require large batch
- StorageOptions.Value.MinBatchSize = 10;
- StorageOptions.Value.MaxBatchSize = 100;
- StorageOptions.Value.BatchFlushInterval = TimeSpan.FromSeconds(5);
-
- var localAddress = SiloAddress.New(new IPEndPoint(IPAddress.Loopback, 5000), 0);
- SetSiloStatus(localAddress, SiloStatus.Active);
- var manager = CreateManager(localAddress);
-
- var date = DateTime.UtcNow;
- var shard = await manager.CreateShardAsync(date, date.AddHours(1), _metadata, cancellationToken);
-
- // Schedule 5 jobs (less than MinBatchSize)
- var tasks = new List();
- for (int i = 0; i < 5; i++)
- {
- tasks.Add(shard.TryScheduleJobAsync(new ScheduleJobRequest { Target = GrainId.Create("type", $"target{i}"), JobName = $"job{i}", DueTime = date.AddMilliseconds(i), Metadata = null }, cancellationToken));
- }
-
- // Give operations time to queue
- await Task.Delay(50, cancellationToken);
-
- // Verify no blocks committed yet (batch still pending)
- var azureShard = (AzureStorageJobShard)shard;
- var blockCountBefore = azureShard.CommitedBlockCount;
-
- // Update metadata (should flush pending batch and process immediately)
- var newMetadata = new Dictionary(shard.Metadata) { ["Updated"] = "true" };
- await azureShard.UpdateBlobMetadata(newMetadata, cancellationToken);
-
- Assert.All(tasks, t => Assert.True(t.IsCompletedSuccessfully, "Expected all job scheduling tasks to complete successfully"));
- Assert.True(azureShard.CommitedBlockCount > blockCountBefore, "Expected metadata update to flush pending batch");
-
- // Verify metadata was updated
- var props = await azureShard.BlobClient.GetPropertiesAsync(cancellationToken: cancellationToken);
- Assert.True(props.Value.Metadata.ContainsKey("Updated"));
- Assert.Equal("true", props.Value.Metadata["Updated"]);
-
- // Verify jobs were persisted (even though batch was incomplete)
- SetSiloStatus(localAddress, SiloStatus.Dead);
- var newSiloAddress = SiloAddress.New(new IPEndPoint(IPAddress.Loopback, 5000), 1);
- SetSiloStatus(newSiloAddress, SiloStatus.Active);
-
- // Reconfigure batching to make test faster
- StorageOptions.Value.MinBatchSize = 1;
- StorageOptions.Value.MaxBatchSize = 1;
- StorageOptions.Value.BatchFlushInterval = TimeSpan.FromMilliseconds(100);
-
- var newManager = CreateManager(newSiloAddress);
- var shards = await newManager.AssignJobShardsAsync(DateTime.UtcNow.AddHours(1), maxNewClaims: int.MaxValue, cancellationToken);
- Assert.Single(shards);
-
- var consumedJobs = new List();
- await foreach (var jobCtx in shards[0].ConsumeDurableJobsAsync().WithCancellation(cancellationToken))
- {
- consumedJobs.Add(jobCtx.Job.Name);
- await shards[0].RemoveJobAsync(jobCtx.Job.Id, cancellationToken);
- }
-
- Assert.Equal(5, consumedJobs.Count);
- await newManager.UnregisterShardAsync(shards[0], cancellationToken);
- }
-
- public class InMemoryClusterMembershipService : IClusterMembershipService
- {
- private readonly Dictionary _silos = new();
- private int _version = 0;
-
- public ClusterMembershipSnapshot CurrentSnapshot =>
- new ClusterMembershipSnapshot(_silos.ToImmutableDictionary(), new MembershipVersion(_version));
-
- public IAsyncEnumerable MembershipUpdates => throw new NotImplementedException();
-
- public void SetSiloStatus(SiloAddress address, SiloStatus status)
- {
- _silos[address] = new ClusterMember(address, status, address.ToParsableString());
- _version++;
- }
-
- public ValueTask Refresh(MembershipVersion minimumVersion = default, CancellationToken cancellationToken = default) =>
- ValueTask.CompletedTask;
-
- public Task TryKill(SiloAddress siloAddress) => throw new NotImplementedException();
- }
-}
diff --git a/test/Extensions/Orleans.Azure.Tests/DurableJobs/AzureStorageJobShardManagerTestFixture.cs b/test/Extensions/Orleans.Azure.Tests/DurableJobs/AzureStorageJobShardManagerTestFixture.cs
deleted file mode 100644
index bfbae50ba93..00000000000
--- a/test/Extensions/Orleans.Azure.Tests/DurableJobs/AzureStorageJobShardManagerTestFixture.cs
+++ /dev/null
@@ -1,49 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Threading.Tasks;
-using Microsoft.Extensions.Logging.Abstractions;
-using Microsoft.Extensions.Options;
-using Orleans.Hosting;
-using Orleans.Runtime;
-using Orleans.DurableJobs;
-using Orleans.DurableJobs.AzureStorage;
-using Tester.AzureUtils;
-using Tester.DurableJobs;
-
-namespace Orleans.Tests.DurableJobs.AzureStorage;
-
-///
-/// Azure Storage implementation of .
-/// Provides the infrastructure needed to run shared job shard manager tests against Azure Storage.
-///
-internal sealed class AzureStorageJobShardManagerTestFixture : IJobShardManagerTestFixture
-{
- private readonly IOptions _storageOptions;
- private readonly IOptions _durableJobsOptions;
-
- public AzureStorageJobShardManagerTestFixture()
- {
- _storageOptions = Options.Create(new AzureStorageJobShardOptions());
- _durableJobsOptions = Options.Create(new DurableJobsOptions());
- _storageOptions.Value.ConfigureTestDefaults();
- _storageOptions.Value.ContainerName = "test-container-" + Guid.NewGuid().ToString("N");
- }
-
- public JobShardManager CreateManager(ILocalSiloDetails localSiloDetails, IClusterMembershipService membershipService)
- {
- return new AzureStorageJobShardManager(
- localSiloDetails,
- _storageOptions,
- _durableJobsOptions,
- membershipService,
- NullLoggerFactory.Instance);
- }
-
- public async ValueTask DisposeAsync()
- {
- // Cleanup storage container
- var client = _storageOptions.Value.BlobServiceClient;
- var container = client.GetBlobContainerClient(_storageOptions.Value.ContainerName);
- await container.DeleteIfExistsAsync();
- }
-}
diff --git a/test/Extensions/Orleans.Azure.Tests/DurableJobs/AzureStorageJobShardManagerTests.cs b/test/Extensions/Orleans.Azure.Tests/DurableJobs/AzureStorageJobShardManagerTests.cs
deleted file mode 100644
index 6ea17ecfc39..00000000000
--- a/test/Extensions/Orleans.Azure.Tests/DurableJobs/AzureStorageJobShardManagerTests.cs
+++ /dev/null
@@ -1,226 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Collections.Immutable;
-using System.Linq;
-using System.Net;
-using System.Text;
-using System.Threading;
-using System.Threading.Tasks;
-using AwesomeAssertions;
-using Microsoft.Extensions.Logging;
-using Microsoft.Extensions.Logging.Abstractions;
-using Microsoft.Extensions.Options;
-using Orleans.Internal;
-using Orleans.DurableJobs;
-using Orleans.DurableJobs.AzureStorage;
-using Orleans.Tests.DurableJobs.AzureStorage;
-using Tester.DurableJobs;
-using Xunit;
-using Xunit.Sdk;
-
-namespace Tester.AzureUtils.DurableJobs;
-
-///
-/// Azure Storage-specific tests for job shard manager functionality.
-/// Common tests are delegated to for reusability across providers.
-/// Provider-specific tests (e.g., batching) remain here.
-///
-[TestCategory("DurableJobs")]
-public class AzureStorageJobShardManagerTests : AzureStorageBasicTests, IAsyncDisposable
-{
- private readonly AzureStorageJobShardManagerTestFixture _fixture;
- private readonly JobShardManagerTestsRunner _runner;
-
- internal IOptions StorageOptions { get; }
-
- public AzureStorageJobShardManagerTests()
- {
- StorageOptions = Options.Create(new AzureStorageJobShardOptions());
- StorageOptions.Value.ConfigureTestDefaults();
- StorageOptions.Value.ContainerName = "test-container-" + Guid.NewGuid().ToString("N");
-
- // Create fixture and runner for common tests
- _fixture = new AzureStorageJobShardManagerTestFixture();
- _runner = new JobShardManagerTestsRunner(_fixture);
- }
-
- public async ValueTask DisposeAsync()
- {
- // Cleanup storage container
- var client = StorageOptions.Value.BlobServiceClient;
- var container = client.GetBlobContainerClient(StorageOptions.Value.ContainerName);
- await container.DeleteIfExistsAsync();
-
- // Cleanup fixture
- await _fixture.DisposeAsync();
- }
-
- #region Common Tests (Delegated to Runner)
-
- ///
- /// Tests basic shard creation and assignment workflow.
- /// This test is delegated to the runner for reuse across providers.
- ///
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShardManager_Creation_Assignation()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- await _runner.ShardCreationAndAssignment(cts.Token);
- }
-
- ///
- /// Tests reading and consuming jobs from a frozen shard after ownership transfer.
- /// This test is delegated to the runner for reuse across providers.
- ///
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShardManager_ReadFrozenShard()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- await _runner.ReadFrozenShard(cts.Token);
- }
-
- ///
- /// Tests consuming jobs from a live shard.
- /// This test is delegated to the runner for reuse across providers.
- ///
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShardManager_LiveShard()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- await _runner.LiveShard(cts.Token);
- }
-
- ///
- /// Tests job metadata persistence across ownership transfers.
- /// This test is delegated to the runner for reuse across providers.
- ///
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShardManager_JobMetadata()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- await _runner.JobMetadata(cts.Token);
- }
-
- ///
- /// Tests concurrent shard assignment to verify ownership conflict resolution.
- /// This test is delegated to the runner for reuse across providers.
- ///
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShardManager_ConcurrentShardAssignment_OwnershipConflicts()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- await _runner.ConcurrentShardAssignment_OwnershipConflicts(cts.Token);
- }
-
- ///
- /// Tests shard metadata preservation across ownership transfers.
- /// This test is delegated to the runner for reuse across providers.
- ///
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShardManager_ShardMetadataMerge()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- await _runner.ShardMetadataMerge(cts.Token);
- }
-
- #endregion
-
- ///
- /// Tests stopping shard processing and verifying jobs remain for reassignment.
- /// This test is delegated to the runner for reuse across providers.
- ///
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShardManager_StopProcessingShard()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- await _runner.StopProcessingShard(cts.Token);
- }
-
- ///
- /// Tests retrying a job with a new due time.
- /// This test is delegated to the runner for reuse across providers.
- ///
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShardManager_RetryJobLater()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- await _runner.RetryJobLater(cts.Token);
- }
-
- ///
- /// Tests job cancellation before and during processing.
- /// This test is delegated to the runner for reuse across providers.
- ///
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShardManager_JobCancellation()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- await _runner.JobCancellation(cts.Token);
- }
-
- ///
- /// Tests that multiple shard registrations with the same time range produce unique IDs.
- /// This test is delegated to the runner for reuse across providers.
- ///
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShardManager_ShardRegistrationRetry_IdCollisions()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- await _runner.ShardRegistrationRetry_IdCollisions(cts.Token);
- }
-
- ///
- /// Tests that unregistering a shard with remaining jobs preserves the shard for reassignment.
- /// This test is delegated to the runner for reuse across providers.
- ///
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShardManager_UnregisterShard_WithJobsRemaining()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- await _runner.UnregisterShard_WithJobsRemaining(cts.Token);
- }
-
- ///
- /// Tests that maxNewClaims limits the number of orphaned shards claimed.
- /// This test is delegated to the runner for reuse across providers.
- ///
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShardManager_SlowStart_LimitsOrphanedShardClaims()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- await _runner.SlowStart_LimitsOrphanedShardClaims(cts.Token);
- }
-
- ///
- /// Tests that maxNewClaims = 0 prevents claiming orphaned shards but returns owned shards.
- /// This test is delegated to the runner for reuse across providers.
- ///
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShardManager_SlowStart_ZeroBudgetClaimsNothing()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- await _runner.SlowStart_ZeroBudgetClaimsNothing(cts.Token);
- }
-
- ///
- /// Tests that maxNewClaims = int.MaxValue (unlimited) claims all orphaned shards.
- /// This test is delegated to the runner for reuse across providers.
- ///
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShardManager_SlowStart_UnlimitedBudgetClaimsAll()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- await _runner.SlowStart_UnlimitedBudgetClaimsAll(cts.Token);
- }
-
- ///
- /// Tests that budget exhaustion does not inflate the adopted count, avoiding false poison detection.
- /// This test is delegated to the runner for reuse across providers.
- ///
- [SkippableFact, TestCategory("Azure"), TestCategory("Functional")]
- public async Task AzureStorageJobShardManager_SlowStart_BudgetExhaustion_DoesNotInflateAdoptedCount()
- {
- using var cts = new CancellationTokenSource(TimeSpan.FromMinutes(2));
- await _runner.SlowStart_BudgetExhaustion_DoesNotInflateAdoptedCount(cts.Token);
- }
-}
diff --git a/test/Extensions/Orleans.Azure.Tests/DurableJobs/NetstringJsonSerializerTests.cs b/test/Extensions/Orleans.Azure.Tests/DurableJobs/NetstringJsonSerializerTests.cs
deleted file mode 100644
index 64cf105c586..00000000000
--- a/test/Extensions/Orleans.Azure.Tests/DurableJobs/NetstringJsonSerializerTests.cs
+++ /dev/null
@@ -1,445 +0,0 @@
-using System;
-using System.Buffers;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using System.Text;
-using System.Text.Json;
-using System.Threading.Tasks;
-using AwesomeAssertions;
-using Orleans.Runtime;
-using Orleans.DurableJobs.AzureStorage;
-using Xunit;
-
-namespace Tester.AzureUtils.DurableJobs;
-
-[TestCategory("DurableJobs"), TestCategory("BVT")]
-public class NetstringJsonSerializerTests
-{
- private static byte[] EncodeToBytes(JobOperation operation)
- {
- var stream = new MemoryStream();
- NetstringJsonSerializer.Encode(operation, stream, JobOperationJsonContext.Default.JobOperation);
- return stream.ToArray();
- }
- [Fact]
- public void Encode_RemoveOperation_ProducesCorrectFormat()
- {
- var operation = JobOperation.CreateRemoveOperation("job123");
- var result = EncodeToBytes(operation);
- var resultString = Encoding.UTF8.GetString(result);
-
- resultString.Should().EndWith("\n");
- resultString.Should().Match("*:*\n");
- resultString.Should().Contain("\"type\":1");
- resultString.Should().Contain("\"id\":\"job123\"");
- }
-
- [Fact]
- public void Encode_AddOperation_ProducesCorrectFormat()
- {
- var dueTime = new DateTimeOffset(2025, 10, 31, 12, 0, 0, TimeSpan.Zero);
- var grainId = GrainId.Create("test", "grain1");
- var operation = JobOperation.CreateAddOperation("job456", "TestJob", dueTime, grainId, null);
- var result = EncodeToBytes(operation);
- var resultString = Encoding.UTF8.GetString(result);
-
- resultString.Should().EndWith("\n");
- resultString.Should().Match("*:*\n");
- resultString.Should().Contain("\"id\":\"job456\"");
- resultString.Should().Contain("\"name\":\"TestJob\"");
- }
-
- [Fact]
- public void Encode_RetryOperation_ProducesCorrectFormat()
- {
- var dueTime = new DateTimeOffset(2025, 10, 31, 12, 0, 0, TimeSpan.Zero);
- var operation = JobOperation.CreateRetryOperation("job789", dueTime);
- var result = EncodeToBytes(operation);
- var resultString = Encoding.UTF8.GetString(result);
-
- resultString.Should().EndWith("\n");
- resultString.Should().Match("*:*\n");
- resultString.Should().Contain("\"type\":2");
- resultString.Should().Contain("\"id\":\"job789\"");
- }
-
- [Fact]
- public void Encode_AddOperationWithMetadata_ProducesCorrectFormat()
- {
- var dueTime = new DateTimeOffset(2025, 10, 31, 12, 0, 0, TimeSpan.Zero);
- var grainId = GrainId.Create("test", "grain1");
- var metadata = new Dictionary { ["key1"] = "value1", ["key2"] = "value2" };
- var operation = JobOperation.CreateAddOperation("job999", "MetaJob", dueTime, grainId, metadata);
- var result = EncodeToBytes(operation);
- var resultString = Encoding.UTF8.GetString(result);
-
- resultString.Should().EndWith("\n");
- resultString.Should().Contain("\"metadata\"");
- resultString.Should().Contain("\"key1\":\"value1\"");
- resultString.Should().Contain("\"key2\":\"value2\"");
- }
-
- [Fact]
- public void Encode_VerifiesNetstringFormat()
- {
- var operation = JobOperation.CreateRemoveOperation("test");
- var result = EncodeToBytes(operation);
- var resultString = Encoding.UTF8.GetString(result);
-
- var parts = resultString.Split(':', 2);
- parts.Should().HaveCount(2);
-
- var lengthStr = parts[0];
- lengthStr.Should().HaveLength(6, "length prefix should be 6 hex digits");
- int.TryParse(lengthStr, System.Globalization.NumberStyles.HexNumber, null, out var length).Should().BeTrue("length should be valid hex");
- length.Should().BeGreaterThan(0);
-
- var dataAndNewline = parts[1];
- dataAndNewline.Should().EndWith("\n");
-
- var jsonData = dataAndNewline[..^1];
- var jsonBytes = Encoding.UTF8.GetBytes(jsonData);
- jsonBytes.Length.Should().Be(length, "JSON data length should match the hex length prefix");
- }
-
- [Fact]
- public async Task DecodeAsync_RemoveOperation_DecodesCorrectly()
- {
- var operation = JobOperation.CreateRemoveOperation("job123");
- var encoded = EncodeToBytes(operation);
- var stream = new MemoryStream(encoded);
-
- var results = new List();
- await foreach (var item in NetstringJsonSerializer.DecodeAsync(stream, JobOperationJsonContext.Default.JobOperation, CancellationToken.None))
- {
- results.Add(item);
- }
-
- results.Should().HaveCount(1);
- results[0].Type.Should().Be(JobOperation.OperationType.Remove);
- results[0].Id.Should().Be("job123");
- }
-
- [Fact]
- public async Task DecodeAsync_AddOperation_DecodesCorrectly()
- {
- var dueTime = new DateTimeOffset(2025, 10, 31, 12, 0, 0, TimeSpan.Zero);
- var grainId = GrainId.Create("test", "grain1");
- var operation = JobOperation.CreateAddOperation("job456", "TestJob", dueTime, grainId, null);
- var encoded = EncodeToBytes(operation);
- var stream = new MemoryStream(encoded);
-
- var results = new List();
- await foreach (var item in NetstringJsonSerializer.DecodeAsync(stream, JobOperationJsonContext.Default.JobOperation, CancellationToken.None))
- {
- results.Add(item);
- }
-
- results.Should().HaveCount(1);
- results[0].Type.Should().Be(JobOperation.OperationType.Add);
- results[0].Id.Should().Be("job456");
- results[0].Name.Should().Be("TestJob");
- results[0].DueTime.Should().Be(dueTime);
- results[0].TargetGrainId.Should().Be(grainId);
- }
-
- [Fact]
- public async Task DecodeAsync_MultipleOperations_DecodesCorrectly()
- {
- var dueTime = new DateTimeOffset(2025, 10, 31, 12, 0, 0, TimeSpan.Zero);
- var grainId = GrainId.Create("test", "grain1");
- var op1 = JobOperation.CreateAddOperation("job1", "Job1", dueTime, grainId, null);
- var op2 = JobOperation.CreateRemoveOperation("job2");
- var op3 = JobOperation.CreateRetryOperation("job3", dueTime.AddHours(1));
-
- var stream = new MemoryStream();
- await stream.WriteAsync(EncodeToBytes(op1));
- await stream.WriteAsync(EncodeToBytes(op2));
- await stream.WriteAsync(EncodeToBytes(op3));
- stream.Position = 0;
-
- var results = new List();
- await foreach (var item in NetstringJsonSerializer.DecodeAsync(stream, JobOperationJsonContext.Default.JobOperation, CancellationToken.None))
- {
- results.Add(item);
- }
-
- results.Should().HaveCount(3);
- results[0].Type.Should().Be(JobOperation.OperationType.Add);
- results[0].Id.Should().Be("job1");
- results[1].Type.Should().Be(JobOperation.OperationType.Remove);
- results[1].Id.Should().Be("job2");
- results[2].Type.Should().Be(JobOperation.OperationType.Retry);
- results[2].Id.Should().Be("job3");
- }
-
- [Fact]
- public async Task DecodeAsync_AddOperationWithMetadata_DecodesCorrectly()
- {
- var dueTime = new DateTimeOffset(2025, 10, 31, 12, 0, 0, TimeSpan.Zero);
- var grainId = GrainId.Create("test", "grain1");
- var metadata = new Dictionary { ["key1"] = "value1", ["key2"] = "value2" };
- var operation = JobOperation.CreateAddOperation("job999", "MetaJob", dueTime, grainId, metadata);
- var encoded = EncodeToBytes(operation);
- var stream = new MemoryStream(encoded);
-
- var results = new List();
- await foreach (var item in NetstringJsonSerializer.DecodeAsync(stream, JobOperationJsonContext.Default.JobOperation, CancellationToken.None))
- {
- results.Add(item);
- }
-
- results.Should().HaveCount(1);
- results[0].Metadata.Should().NotBeNull();
- results[0].Metadata.Should().ContainKey("key1").WhoseValue.Should().Be("value1");
- results[0].Metadata.Should().ContainKey("key2").WhoseValue.Should().Be("value2");
- }
-
- [Fact]
- public async Task DecodeAsync_EmptyStream_ReturnsEmpty()
- {
- var stream = new MemoryStream();
-
- var results = new List();
- await foreach (var item in NetstringJsonSerializer.DecodeAsync(stream, JobOperationJsonContext.Default.JobOperation, CancellationToken.None))
- {
- results.Add(item);
- }
-
- results.Should().BeEmpty();
- }
-
- [Fact]
- public async Task DecodeAsync_InvalidLength_ThrowsInvalidDataException()
- {
- var encoded = "GGGGGG:{\"type\":1,\"id\":\"test\"}\n"; // Invalid hex
- var stream = new MemoryStream(Encoding.UTF8.GetBytes(encoded));
-
- var act = async () =>
- {
- await foreach (var item in NetstringJsonSerializer.DecodeAsync(stream, JobOperationJsonContext.Default.JobOperation, CancellationToken.None))
- {
- // Should throw before yielding any items
- }
- };
-
- await act.Should().ThrowAsync()
- .WithMessage("Invalid netstring length: GGGGGG");
- }
-
- [Fact]
- public async Task DecodeAsync_ExcessiveLength_ThrowsInvalidDataException()
- {
- var encoded = "FFFFFF:{\"type\":1}\n"; // 16777215 bytes, exceeds MaxLength
- var stream = new MemoryStream(Encoding.UTF8.GetBytes(encoded));
-
- var act = async () =>
- {
- await foreach (var item in NetstringJsonSerializer.DecodeAsync(stream, JobOperationJsonContext.Default.JobOperation, CancellationToken.None))
- {
- // Should throw before yielding any items
- }
- };
-
- await act.Should().ThrowAsync()
- .WithMessage("Netstring length out of valid range: *");
- }
-
- [Fact]
- public async Task DecodeAsync_MissingTrailingNewline_ThrowsInvalidDataException()
- {
- var json = "{\"type\":1,\"id\":\"test\"}";
- var jsonBytes = Encoding.UTF8.GetBytes(json);
- var encoded = $"{jsonBytes.Length:X6}:{json}x"; // Use 6-digit hex format
- var stream = new MemoryStream(Encoding.UTF8.GetBytes(encoded));
-
- var act = async () =>
- {
- await foreach (var item in NetstringJsonSerializer.DecodeAsync(stream, JobOperationJsonContext.Default.JobOperation, CancellationToken.None))
- {
- // Should throw after reading the data
- }
- };
-
- await act.Should().ThrowAsync()
- .WithMessage("Expected newline at end of netstring, got byte value *");
- }
-
- [Fact]
- public async Task DecodeAsync_IncompleteData_ThrowsEndOfStreamException()
- {
- var encoded = "000064:{\"type\":1}"; // Claims 100 bytes but only provides 11
- var stream = new MemoryStream(Encoding.UTF8.GetBytes(encoded));
-
- var act = async () =>
- {
- await foreach (var item in NetstringJsonSerializer.DecodeAsync(stream, JobOperationJsonContext.Default.JobOperation, CancellationToken.None))
- {
- // Should throw before yielding any items
- }
- };
-
- await act.Should().ThrowAsync();
- }
-
- [Fact]
- public async Task DecodeAsync_WrongTrailingCharacter_ThrowsInvalidDataException()
- {
- var json = "{\"type\":1,\"id\":\"test\"}";
- var jsonBytes = Encoding.UTF8.GetBytes(json);
- var encoded = $"{jsonBytes.Length:X6}:{json}X"; // Use 6-digit hex format
- var stream = new MemoryStream(Encoding.UTF8.GetBytes(encoded));
-
- var act = async () =>
- {
- await foreach (var item in NetstringJsonSerializer.DecodeAsync(stream, JobOperationJsonContext.Default.JobOperation, CancellationToken.None))
- {
- // Should throw after reading the data
- }
- };
-
- await act.Should().ThrowAsync()
- .WithMessage("Expected newline at end of netstring, got byte value *");
- }
-
- [Fact]
- public async Task DecodeAsync_InvalidJson_ThrowsJsonException()
- {
- var invalidJson = "{invalid json}";
- var jsonBytes = Encoding.UTF8.GetBytes(invalidJson);
- var encoded = $"{jsonBytes.Length:X6}:{invalidJson}\n"; // Use 6-digit hex format
- var stream = new MemoryStream(Encoding.UTF8.GetBytes(encoded));
-
- var act = async () =>
- {
- await foreach (var item in NetstringJsonSerializer.DecodeAsync(stream, JobOperationJsonContext.Default.JobOperation, CancellationToken.None))
- {
- // Should throw when deserializing
- }
- };
-
- await act.Should().ThrowAsync();
- }
-
- [Fact]
- public async Task EncodeAndDecode_RoundTrip_PreservesData()
- {
- var dueTime1 = new DateTimeOffset(2025, 10, 31, 12, 0, 0, TimeSpan.Zero);
- var dueTime2 = new DateTimeOffset(2025, 11, 1, 14, 30, 0, TimeSpan.Zero);
- var grainId = GrainId.Create("test", "grain1");
- var metadata = new Dictionary { ["env"] = "prod", ["region"] = "us-east" };
-
- var testOperations = new[]
- {
- JobOperation.CreateRemoveOperation("remove-job"),
- JobOperation.CreateAddOperation("add-job", "MyJob", dueTime1, grainId, null),
- JobOperation.CreateRetryOperation("retry-job", dueTime2),
- JobOperation.CreateAddOperation("meta-job", "MetaJob", dueTime1, grainId, metadata)
- };
-
- foreach (var operation in testOperations)
- {
- var encoded = EncodeToBytes(operation);
- var stream = new MemoryStream(encoded);
-
- var results = new List();
- await foreach (var item in NetstringJsonSerializer.DecodeAsync(stream, JobOperationJsonContext.Default.JobOperation, CancellationToken.None))
- {
- results.Add(item);
- }
-
- results.Should().HaveCount(1);
- results[0].Type.Should().Be(operation.Type);
- results[0].Id.Should().Be(operation.Id);
- results[0].Name.Should().Be(operation.Name);
- results[0].DueTime.Should().Be(operation.DueTime);
- results[0].TargetGrainId.Should().Be(operation.TargetGrainId);
-
- if (operation.Metadata is not null)
- {
- results[0].Metadata.Should().NotBeNull();
- results[0].Metadata.Should().BeEquivalentTo(operation.Metadata);
- }
- }
- }
-
- [Fact]
- public async Task EncodeAndDecode_MultipleOperations_RoundTrip()
- {
- var dueTime = new DateTimeOffset(2025, 10, 31, 12, 0, 0, TimeSpan.Zero);
- var grainId = GrainId.Create("test", "grain1");
-
- var testOperations = new[]
- {
- JobOperation.CreateAddOperation("job1", "First", dueTime, grainId, null),
- JobOperation.CreateRemoveOperation("job2"),
- JobOperation.CreateRetryOperation("job3", dueTime.AddHours(1)),
- JobOperation.CreateAddOperation("job4", "Fourth", dueTime.AddDays(1), grainId, null)
- };
-
- var memoryStream = new MemoryStream();
- foreach (var operation in testOperations)
- {
- var encoded = EncodeToBytes(operation);
- await memoryStream.WriteAsync(encoded);
- }
-
- memoryStream.Position = 0;
-
- var results = new List();
- await foreach (var item in NetstringJsonSerializer.DecodeAsync(memoryStream, JobOperationJsonContext.Default.JobOperation, CancellationToken.None))
- {
- results.Add(item);
- }
-
- results.Should().HaveCount(4);
- for (var i = 0; i < testOperations.Length; i++)
- {
- results[i].Type.Should().Be(testOperations[i].Type);
- results[i].Id.Should().Be(testOperations[i].Id);
- }
- }
-
- [Fact]
- public async Task DecodeAsync_StreamPosition_IsPreserved()
- {
- var operation = JobOperation.CreateRemoveOperation("test");
- var encoded = EncodeToBytes(operation);
- var stream = new MemoryStream(encoded);
-
- await foreach (var item in NetstringJsonSerializer.DecodeAsync(stream, JobOperationJsonContext.Default.JobOperation, CancellationToken.None))
- {
- // Stream should be at the end after reading
- }
-
- stream.Position.Should().Be(stream.Length);
- }
-
- [Fact]
- public async Task EncodeAndDecode_LargeMetadata_HandlesCorrectly()
- {
- var dueTime = new DateTimeOffset(2025, 10, 31, 12, 0, 0, TimeSpan.Zero);
- var grainId = GrainId.Create("test", "grain1");
-
- var largeMetadata = new Dictionary();
- for (var i = 0; i < 100; i++)
- {
- largeMetadata[$"key{i}"] = new string('x', 1000);
- }
-
- var operation = JobOperation.CreateAddOperation("large-job", "LargeMetaJob", dueTime, grainId, largeMetadata);
- var encoded = EncodeToBytes(operation);
- var stream = new MemoryStream(encoded);
-
- var results = new List();
- await foreach (var item in NetstringJsonSerializer.DecodeAsync(stream, JobOperationJsonContext.Default.JobOperation, CancellationToken.None))
- {
- results.Add(item);
- }
-
- results.Should().HaveCount(1);
- results[0].Metadata.Should().NotBeNull();
- results[0].Metadata.Should().HaveCount(100);
- }
-}
diff --git a/test/Extensions/Orleans.Azure.Tests/Orleans.Azure.Tests.csproj b/test/Extensions/Orleans.Azure.Tests/Orleans.Azure.Tests.csproj
index b654de69ca5..2e704b4cece 100644
--- a/test/Extensions/Orleans.Azure.Tests/Orleans.Azure.Tests.csproj
+++ b/test/Extensions/Orleans.Azure.Tests/Orleans.Azure.Tests.csproj
@@ -21,9 +21,7 @@
-
-
diff --git a/test/Extensions/Orleans.DurableJobs.AzureStorage.Tests/DurableJobs/AzureBlobJournaledJobShardManagerTests.cs b/test/Extensions/Orleans.DurableJobs.AzureStorage.Tests/DurableJobs/AzureBlobJournaledJobShardManagerTests.cs
new file mode 100644
index 00000000000..15bac6e3691
--- /dev/null
+++ b/test/Extensions/Orleans.DurableJobs.AzureStorage.Tests/DurableJobs/AzureBlobJournaledJobShardManagerTests.cs
@@ -0,0 +1,67 @@
+#nullable enable
+
+using Azure.Storage.Blobs;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Logging;
+using Orleans.DurableJobs.Tests;
+using Orleans.Hosting;
+using Orleans.Runtime;
+using Tester;
+using TestExtensions;
+using Xunit;
+
+namespace Tester.AzureUtils.DurableJobs;
+
+[TestCategory("Azure"), TestCategory("DurableJobs")]
+public sealed class AzureBlobJournaledJobShardManagerTests(AzureBlobJournaledJobShardManagerTestFixture fixture)
+ : JobShardManagerTestsRunner(fixture), IClassFixture;
+
+public sealed class AzureBlobJournaledJobShardManagerTestFixture : IJobShardManagerTestFixture
+{
+ public async Task CreateScopeAsync()
+ {
+ TestUtils.CheckForAzureStorage();
+
+ var containerName = "durablejobs-shard-tests-" + Guid.NewGuid().ToString("N");
+ var services = new ServiceCollection();
+ services.AddLogging();
+ services.AddSingleton(TimeProvider.System);
+ services.UseAzureBlobDurableJobs(options =>
+ {
+ options.ConfigureTestDefaults();
+ options.ContainerName = containerName;
+ });
+
+ var serviceProvider = services.BuildServiceProvider();
+ var lifecycle = new SiloLifecycleSubject(serviceProvider.GetRequiredService>());
+ foreach (var participant in serviceProvider.GetServices>())
+ {
+ participant.Participate(lifecycle);
+ }
+
+ using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(60));
+ await lifecycle.OnStart(cts.Token);
+ return new AzureBlobJournaledJobShardManagerTestScope(serviceProvider, lifecycle, CreateContainerClient(containerName));
+ }
+
+ private static BlobContainerClient CreateContainerClient(string containerName)
+ {
+ return TestDefaultConfiguration.UseAadAuthentication
+ ? new BlobContainerClient(new Uri(TestDefaultConfiguration.DataBlobUri, containerName), TestDefaultConfiguration.TokenCredential)
+ : new BlobContainerClient(TestDefaultConfiguration.DataConnectionString, containerName);
+ }
+
+ private sealed class AzureBlobJournaledJobShardManagerTestScope(
+ ServiceProvider services,
+ SiloLifecycleSubject lifecycle,
+ BlobContainerClient container) : JournaledJobShardManagerTestScope(services)
+ {
+ public override async ValueTask DisposeAsync()
+ {
+ using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(60));
+ await lifecycle.OnStop(cts.Token);
+ await base.DisposeAsync();
+ await container.DeleteIfExistsAsync(cancellationToken: cts.Token);
+ }
+ }
+}
diff --git a/test/Extensions/Orleans.Azure.Tests/DurableJobs/AzureStorageBlobDurableJobsTests.cs b/test/Extensions/Orleans.DurableJobs.AzureStorage.Tests/DurableJobs/AzureStorageBlobDurableJobsTests.cs
similarity index 87%
rename from test/Extensions/Orleans.Azure.Tests/DurableJobs/AzureStorageBlobDurableJobsTests.cs
rename to test/Extensions/Orleans.DurableJobs.AzureStorage.Tests/DurableJobs/AzureStorageBlobDurableJobsTests.cs
index 4dd964c8f0d..60178f563e2 100644
--- a/test/Extensions/Orleans.Azure.Tests/DurableJobs/AzureStorageBlobDurableJobsTests.cs
+++ b/test/Extensions/Orleans.DurableJobs.AzureStorage.Tests/DurableJobs/AzureStorageBlobDurableJobsTests.cs
@@ -1,8 +1,7 @@
-using System;
-using System.Threading;
-using System.Threading.Tasks;
using Microsoft.Extensions.DependencyInjection;
using Orleans.Configuration;
+using Orleans.Hosting;
+using Orleans.Journaling;
using Orleans.TestingHost;
using Tester;
using Tester.DurableJobs;
@@ -129,3 +128,20 @@ public async Task JobRetry()
await _runner.JobRetry(cts.Token);
}
}
+
+internal static class AzureBlobDurableJobsTestConfiguration
+{
+ public static AzureBlobJournalStorageOptions ConfigureTestDefaults(this AzureBlobJournalStorageOptions options)
+ {
+ if (TestDefaultConfiguration.UseAadAuthentication)
+ {
+ options.ConfigureBlobServiceClient(TestDefaultConfiguration.DataBlobUri, TestDefaultConfiguration.TokenCredential);
+ }
+ else
+ {
+ options.ConfigureBlobServiceClient(TestDefaultConfiguration.DataConnectionString);
+ }
+
+ return options;
+ }
+}
diff --git a/test/Extensions/Orleans.DurableJobs.AzureStorage.Tests/Orleans.DurableJobs.AzureStorage.Tests.csproj b/test/Extensions/Orleans.DurableJobs.AzureStorage.Tests/Orleans.DurableJobs.AzureStorage.Tests.csproj
new file mode 100644
index 00000000000..a52f7618b79
--- /dev/null
+++ b/test/Extensions/Orleans.DurableJobs.AzureStorage.Tests/Orleans.DurableJobs.AzureStorage.Tests.csproj
@@ -0,0 +1,32 @@
+
+
+ true
+ TRACE;TESTER_AZUREUTILS;ORLEANS_PERSISTENCE
+ Exe
+ $(TestTargetFrameworks)
+ false
+ true
+ en-US
+
+
+
+ $(NoWarn);ORLEANSEXP005
+
+
+
+
+
+
+ all
+ runtime; build; native; contentfiles; analyzers; buildtransitive
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/Extensions/Orleans.DurableJobs.AzureStorage.Tests/Program.cs b/test/Extensions/Orleans.DurableJobs.AzureStorage.Tests/Program.cs
new file mode 100644
index 00000000000..f979d5fb50e
--- /dev/null
+++ b/test/Extensions/Orleans.DurableJobs.AzureStorage.Tests/Program.cs
@@ -0,0 +1,8 @@
+using Orleans.TestingHost;
+
+namespace Tester.AzureUtils.DurableJobs;
+
+public static class Program
+{
+ public static async Task Main(string[] args) => await StandaloneSiloHost.Main(args);
+}
diff --git a/test/Orleans.Core.Tests/DurableJobs/InMemoryJobQueueTests.cs b/test/Orleans.Core.Tests/DurableJobs/InMemoryJobQueueTests.cs
index c9826818cfc..0026c4ef0dc 100644
--- a/test/Orleans.Core.Tests/DurableJobs/InMemoryJobQueueTests.cs
+++ b/test/Orleans.Core.Tests/DurableJobs/InMemoryJobQueueTests.cs
@@ -3,9 +3,10 @@
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
+using Microsoft.Extensions.Time.Testing;
+using NSubstitute;
using Orleans.DurableJobs;
using Orleans.Runtime;
-using NSubstitute;
using Xunit;
namespace NonSilo.Tests.DurableJobs;
@@ -109,20 +110,23 @@ public async Task GetAsyncEnumerator_WithInitialDequeueCount_IncrementsCorrectly
[Fact]
public async Task GetAsyncEnumerator_WaitsForDueTime()
{
- var queue = new InMemoryJobQueue();
- var futureTime = DateTimeOffset.UtcNow.AddSeconds(2);
+ var timeProvider = new FakeTimeProvider(new DateTimeOffset(2026, 1, 1, 0, 0, 0, TimeSpan.Zero));
+ var queue = new InMemoryJobQueue(timeProvider);
+ var futureTime = timeProvider.GetUtcNow().AddSeconds(1);
var job = CreateJob("job1", futureTime);
queue.Enqueue(job, 0);
queue.MarkAsComplete();
- var startTime = DateTimeOffset.UtcNow;
- await foreach (var context in queue.WithCancellation(CancellationToken.None))
- {
- var elapsed = DateTimeOffset.UtcNow - startTime;
- Assert.True(elapsed.TotalSeconds >= 1.5, $"Job was dequeued too early. Elapsed: {elapsed.TotalSeconds}s");
- break;
- }
+ await using var enumerator = queue.GetAsyncEnumerator(CancellationToken.None);
+ var moveNextTask = enumerator.MoveNextAsync().AsTask();
+
+ Assert.False(moveNextTask.IsCompleted);
+
+ timeProvider.Advance(TimeSpan.FromSeconds(3));
+
+ Assert.True(await moveNextTask.WaitAsync(TimeSpan.FromSeconds(5)));
+ Assert.Equal(job.Id, enumerator.Current.Job.Id);
}
[Fact]
diff --git a/test/Orleans.Core.Tests/DurableJobs/InMemoryJobShardManagerTests.cs b/test/Orleans.Core.Tests/DurableJobs/InMemoryJobShardManagerTests.cs
deleted file mode 100644
index bda6a42ecde..00000000000
--- a/test/Orleans.Core.Tests/DurableJobs/InMemoryJobShardManagerTests.cs
+++ /dev/null
@@ -1,282 +0,0 @@
-#nullable enable
-
-using System.Collections.Immutable;
-using System.Net;
-using Microsoft.Extensions.DependencyInjection;
-using Orleans.DurableJobs;
-using Orleans.Hosting;
-using Orleans.Runtime;
-using NSubstitute;
-using Xunit;
-
-namespace NonSilo.Tests.DurableJobs;
-
-[TestCategory("DurableJobs")]
-public class InMemoryJobShardManagerTests : IAsyncLifetime
-{
- private static readonly SiloAddress Silo1 = SiloAddress.New(new IPEndPoint(IPAddress.Loopback, 5001), 1);
- private static readonly SiloAddress Silo2 = SiloAddress.New(new IPEndPoint(IPAddress.Loopback, 5002), 2);
- private static readonly SiloAddress Silo3 = SiloAddress.New(new IPEndPoint(IPAddress.Loopback, 5003), 3);
- private static readonly SiloAddress Silo4 = SiloAddress.New(new IPEndPoint(IPAddress.Loopback, 5004), 4);
-
- public Task InitializeAsync() => InMemoryJobShardManager.ClearAllShardsAsync();
-
- public Task DisposeAsync() => InMemoryJobShardManager.ClearAllShardsAsync();
-
- [Fact]
- public async Task CreateShardAsync_CreatesShardOwnedBySilo()
- {
- var manager = new InMemoryJobShardManager(Silo1);
- var minDueTime = DateTimeOffset.UtcNow;
- var maxDueTime = minDueTime.AddHours(1);
-
- var shard = await manager.CreateShardAsync(minDueTime, maxDueTime, new Dictionary(), CancellationToken.None);
-
- Assert.NotNull(shard);
- Assert.Equal(minDueTime, shard.StartTime);
- Assert.Equal(maxDueTime, shard.EndTime);
- }
-
- [Fact]
- public async Task AssignJobShardsAsync_ReturnsOwnedShards()
- {
- var manager = new InMemoryJobShardManager(Silo1);
- var minDueTime = DateTimeOffset.UtcNow;
- var maxDueTime = minDueTime.AddHours(1);
-
- var createdShard = await manager.CreateShardAsync(minDueTime, maxDueTime, new Dictionary(), CancellationToken.None);
- var assignedShards = await manager.AssignJobShardsAsync(maxDueTime, int.MaxValue, CancellationToken.None);
-
- Assert.Single(assignedShards);
- Assert.Equal(createdShard.Id, assignedShards[0].Id);
- }
-
- [Fact]
- public async Task AssignJobShardsAsync_OrphanedShard_IsAssignedWithoutIncrementingAdoptedCount()
- {
- // Silo1 creates a shard and gracefully releases it
- var manager1 = new InMemoryJobShardManager(Silo1);
- var minDueTime = DateTimeOffset.UtcNow;
- var maxDueTime = minDueTime.AddHours(1);
-
- var shard = await manager1.CreateShardAsync(minDueTime, maxDueTime, new Dictionary(), CancellationToken.None);
-
- // Schedule a job so the shard isn't deleted on unregister
- await shard.TryScheduleJobAsync(new ScheduleJobRequest { Target = GrainId.Create("test", "grain1"), JobName = "TestJob", DueTime = minDueTime.AddMinutes(30), Metadata = null }, CancellationToken.None);
-
- // Gracefully unregister (sets owner to null)
- await manager1.UnregisterShardAsync(shard, CancellationToken.None);
-
- // Silo2 picks up the orphaned shard
- var manager2 = new InMemoryJobShardManager(Silo2);
- var assignedShards = await manager2.AssignJobShardsAsync(maxDueTime, int.MaxValue, CancellationToken.None);
-
- Assert.Single(assignedShards);
- Assert.Equal(shard.Id, assignedShards[0].Id);
-
- var ownershipInfo = await InMemoryJobShardManager.GetOwnershipInfoAsync(shard.Id);
- Assert.True(ownershipInfo.HasValue);
- Assert.Equal(Silo2.ToString(), ownershipInfo.Value.Owner);
- Assert.Equal(0, ownershipInfo.Value.AdoptedCount);
- }
-
- [Fact]
- public async Task AssignJobShardsAsync_AdoptedFromDeadSilo_IncrementsAdoptedCount()
- {
- // Setup membership service that reports Silo1 as dead
- var membershipService = CreateMembershipService(deadSilos: [Silo1]);
-
- // Silo1 creates a shard (simulating it was created before death)
- var manager1 = new InMemoryJobShardManager(Silo1, membershipService);
- var minDueTime = DateTimeOffset.UtcNow;
- var maxDueTime = minDueTime.AddHours(1);
-
- var shard = await manager1.CreateShardAsync(minDueTime, maxDueTime, new Dictionary(), CancellationToken.None);
-
- // Silo2 adopts the shard from dead Silo1
- var manager2 = new InMemoryJobShardManager(Silo2, membershipService, maxAdoptedCount: 3);
- var assignedShards = await manager2.AssignJobShardsAsync(maxDueTime, int.MaxValue, CancellationToken.None);
-
- // Shard should be assigned (adopted count = 1, under threshold)
- Assert.Single(assignedShards);
- Assert.Equal(shard.Id, assignedShards[0].Id);
-
- var ownershipInfo = await InMemoryJobShardManager.GetOwnershipInfoAsync(shard.Id);
- Assert.True(ownershipInfo.HasValue);
- Assert.Equal(Silo2.ToString(), ownershipInfo.Value.Owner);
- Assert.Equal(1, ownershipInfo.Value.AdoptedCount);
- }
-
- [Fact]
- public async Task AssignJobShardsAsync_PoisonedShard_IsNotAssigned()
- {
- // Setup membership service
- var membershipService = Substitute.For();
- var snapshot = CreateMembershipSnapshot(deadSilos: [Silo1, Silo2, Silo3]);
- membershipService.CurrentSnapshot.Returns(snapshot);
-
- // Silo1 creates a shard
- var manager1 = new InMemoryJobShardManager(Silo1, membershipService, maxAdoptedCount: 2);
- var minDueTime = DateTimeOffset.UtcNow;
- var maxDueTime = minDueTime.AddHours(1);
-
- await manager1.CreateShardAsync(minDueTime, maxDueTime, new Dictionary(), CancellationToken.None);
-
- // Silo2 adopts from dead Silo1 (adopted count = 1)
- var manager2 = new InMemoryJobShardManager(Silo2, membershipService, maxAdoptedCount: 2);
- var shards2 = await manager2.AssignJobShardsAsync(maxDueTime, int.MaxValue, CancellationToken.None);
- Assert.Single(shards2);
-
- // Silo3 adopts from dead Silo2 (adopted count = 2)
- var manager3 = new InMemoryJobShardManager(Silo3, membershipService, maxAdoptedCount: 2);
- var shards3 = await manager3.AssignJobShardsAsync(maxDueTime, int.MaxValue, CancellationToken.None);
- Assert.Single(shards3);
-
- // Silo4 tries to adopt from dead Silo3 (adopted count would be 3, exceeds max of 2)
- var manager4 = new InMemoryJobShardManager(Silo4, membershipService, maxAdoptedCount: 2);
- var shards4 = await manager4.AssignJobShardsAsync(maxDueTime, int.MaxValue, CancellationToken.None);
-
- // Shard is poisoned and should not be assigned
- Assert.Empty(shards4);
- }
-
- [Fact]
- public async Task AssignJobShardsAsync_MaxAdoptedCountOfZero_NeverAssignsAdoptedShards()
- {
- // Setup membership service that reports Silo1 as dead
- var membershipService = CreateMembershipService(deadSilos: [Silo1]);
-
- // Silo1 creates a shard
- var manager1 = new InMemoryJobShardManager(Silo1, membershipService, maxAdoptedCount: 0);
- var minDueTime = DateTimeOffset.UtcNow;
- var maxDueTime = minDueTime.AddHours(1);
-
- await manager1.CreateShardAsync(minDueTime, maxDueTime, new Dictionary(), CancellationToken.None);
-
- // Silo2 tries to adopt from dead Silo1 with maxAdoptedCount=0
- var manager2 = new InMemoryJobShardManager(Silo2, membershipService, maxAdoptedCount: 0);
- var assignedShards = await manager2.AssignJobShardsAsync(maxDueTime, int.MaxValue, CancellationToken.None);
-
- // Shard should not be assigned (adopted count would be 1, exceeds max of 0)
- Assert.Empty(assignedShards);
- }
-
- [Fact]
- public async Task UseInMemoryDurableJobs_ConfiguredMaxAdoptedCount_IsApplied()
- {
- var membershipService = CreateMembershipService(deadSilos: [Silo2]);
- var minDueTime = DateTimeOffset.UtcNow;
- var maxDueTime = minDueTime.AddHours(1);
-
- var ownerManager = new InMemoryJobShardManager(Silo2, membershipService, maxAdoptedCount: 3);
- await ownerManager.CreateShardAsync(minDueTime, maxDueTime, new Dictionary(), CancellationToken.None);
-
- var localSiloDetails = Substitute.For();
- localSiloDetails.SiloAddress.Returns(Silo1);
-
- var services = new ServiceCollection();
- services.AddSingleton(localSiloDetails);
- services.AddSingleton(membershipService);
- services.Configure(options => options.MaxAdoptedCount = 0);
- services.UseInMemoryDurableJobs();
-
- using var serviceProvider = services.BuildServiceProvider();
- var manager = serviceProvider.GetRequiredService();
-
- var assignedShards = await manager.AssignJobShardsAsync(maxDueTime, int.MaxValue, CancellationToken.None);
- Assert.Empty(assignedShards);
- }
-
- [Fact]
- public async Task AssignJobShardsAsync_ShardFromActiveSilo_IsNotAssigned()
- {
- // Setup membership service that reports Silo1 as active
- var membershipService = CreateMembershipService(activeSilos: [Silo1]);
-
- // Silo1 creates a shard
- var manager1 = new InMemoryJobShardManager(Silo1, membershipService);
- var minDueTime = DateTimeOffset.UtcNow;
- var maxDueTime = minDueTime.AddHours(1);
-
- await manager1.CreateShardAsync(minDueTime, maxDueTime, new Dictionary(), CancellationToken.None);
-
- // Silo2 tries to get shards - should not get Silo1's shard since Silo1 is active
- var manager2 = new InMemoryJobShardManager(Silo2, membershipService);
- var assignedShards = await manager2.AssignJobShardsAsync(maxDueTime, int.MaxValue, CancellationToken.None);
-
- Assert.Empty(assignedShards);
- }
-
- [Fact]
- public async Task UnregisterShardAsync_WithNoJobsRemaining_RemovesShard()
- {
- var manager = new InMemoryJobShardManager(Silo1);
- var minDueTime = DateTimeOffset.UtcNow;
- var maxDueTime = minDueTime.AddHours(1);
-
- var shard = await manager.CreateShardAsync(minDueTime, maxDueTime, new Dictionary(), CancellationToken.None);
-
- // Unregister with no jobs
- await manager.UnregisterShardAsync(shard, CancellationToken.None);
-
- // Shard should be removed, not reassignable
- var assignedShards = await manager.AssignJobShardsAsync(maxDueTime, int.MaxValue, CancellationToken.None);
- Assert.Empty(assignedShards);
- }
-
- [Fact]
- public async Task UnregisterShardAsync_WithJobsRemaining_MarksShardAsOrphaned()
- {
- var manager1 = new InMemoryJobShardManager(Silo1);
- var minDueTime = DateTimeOffset.UtcNow;
- var maxDueTime = minDueTime.AddHours(1);
-
- var shard = await manager1.CreateShardAsync(minDueTime, maxDueTime, new Dictionary(), CancellationToken.None);
-
- // Add a job
- await shard.TryScheduleJobAsync(new ScheduleJobRequest { Target = GrainId.Create("test", "grain1"), JobName = "TestJob", DueTime = minDueTime.AddMinutes(30), Metadata = null }, CancellationToken.None);
-
- // Unregister with jobs remaining
- await manager1.UnregisterShardAsync(shard, CancellationToken.None);
-
- // Shard should be orphaned and available for another silo
- var manager2 = new InMemoryJobShardManager(Silo2);
- var assignedShards = await manager2.AssignJobShardsAsync(maxDueTime, int.MaxValue, CancellationToken.None);
- Assert.Single(assignedShards);
- }
-
- private static IClusterMembershipService CreateMembershipService(
- SiloAddress[]? activeSilos = null,
- SiloAddress[]? deadSilos = null)
- {
- var membershipService = Substitute.For