-
Notifications
You must be signed in to change notification settings - Fork 5.1k
Add transmission state manager with exponential back off #34926
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
vishweshbankwar
merged 13 commits into
main
from
vibankwa/update-transmit-from-storage-mechanism
Mar 16, 2023
Merged
Changes from all commits
Commits
Show all changes
13 commits
Select commit
Hold shift + click to select a range
806760d
draft
0493a2b
Merge branch 'main' into vibankwa/update-transmit-from-storage-mechanism
1ce397c
Transmission state mgr for exponential backoff
14a7b40
fix comment
7763072
fix comment
31ba304
refactor
b708e29
address PR comments
d929740
fix typo
bf3055c
add summary for back off time interval calculation
9a3878f
fix comment
70a8bd1
add ctr for tests and unit tests
53da284
Merge branch 'main' into vibankwa/update-transmit-from-storage-mechanism
5e5072e
add timer dispose
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
18 changes: 18 additions & 0 deletions
18
sdk/monitor/Azure.Monitor.OpenTelemetry.Exporter/src/Internals/TransmissionState.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| // Copyright (c) Microsoft Corporation. All rights reserved. | ||
| // Licensed under the MIT License. | ||
|
|
||
| namespace Azure.Monitor.OpenTelemetry.Exporter.Internals | ||
| { | ||
| internal enum TransmissionState | ||
|
vishweshbankwar marked this conversation as resolved.
|
||
| { | ||
| /// <summary> | ||
| /// Represents disabled transmission. | ||
| /// </summary> | ||
| Open, | ||
|
|
||
| /// <summary> | ||
| /// Represents enabled transmission. | ||
| /// </summary> | ||
| Closed | ||
| } | ||
| } | ||
165 changes: 165 additions & 0 deletions
165
sdk/monitor/Azure.Monitor.OpenTelemetry.Exporter/src/Internals/TransmissionStateManager.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,165 @@ | ||
| // Copyright (c) Microsoft Corporation. All rights reserved. | ||
| // Licensed under the MIT License. | ||
|
|
||
| using System; | ||
| using System.Threading; | ||
|
|
||
| namespace Azure.Monitor.OpenTelemetry.Exporter.Internals | ||
| { | ||
| internal class TransmissionStateManager : IDisposable | ||
| { | ||
| private const int MaxDelayInSeconds = 3600; | ||
|
|
||
| private const int MinDelayInSeconds = 10; | ||
|
|
||
| private readonly Random _random = new(); | ||
|
vishweshbankwar marked this conversation as resolved.
|
||
|
|
||
| /// <summary> | ||
| /// Minimum time interval between failures to increment consecutive error count. | ||
| /// </summary> | ||
| private TimeSpan _minIntervalToUpdateConsecutiveErrors = TimeSpan.FromSeconds(20); | ||
|
|
||
| /// <summary> | ||
| /// Time threshold after which consecutive error count can be incremented. | ||
| /// </summary> | ||
| private DateTimeOffset _nextMinTimeToUpdateConsecutiveErrors = DateTimeOffset.MinValue; | ||
|
vishweshbankwar marked this conversation as resolved.
|
||
|
|
||
| private readonly System.Timers.Timer _backOffIntervalTimer; | ||
|
vishweshbankwar marked this conversation as resolved.
|
||
|
|
||
| private double _syncBackOffIntervalCalculation; | ||
|
|
||
| private int _consecutiveErrors; | ||
| private bool _disposed; | ||
|
|
||
| internal TransmissionState State { get; private set; } | ||
|
|
||
| internal TransmissionStateManager() | ||
| { | ||
| _backOffIntervalTimer = new(); | ||
| _backOffIntervalTimer.Elapsed += ResetTransmission; | ||
| _backOffIntervalTimer.AutoReset = false; | ||
| State = TransmissionState.Closed; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// For test purposes. | ||
| /// </summary> | ||
| /// <param name="random"></param> | ||
| /// <param name="minIntervalToUpdateConsecutiveErrors"></param> | ||
| /// <param name="nextMinTimeToUpdateConsecutiveErrors"></param> | ||
| /// <param name="backOffIntervalTimer"></param> | ||
| /// <param name="state"></param> | ||
| internal TransmissionStateManager( | ||
| Random random, | ||
| TimeSpan minIntervalToUpdateConsecutiveErrors, | ||
| DateTimeOffset nextMinTimeToUpdateConsecutiveErrors, | ||
| System.Timers.Timer backOffIntervalTimer, | ||
| TransmissionState state) | ||
| { | ||
| _random = random; | ||
| _minIntervalToUpdateConsecutiveErrors = minIntervalToUpdateConsecutiveErrors; | ||
| _nextMinTimeToUpdateConsecutiveErrors = nextMinTimeToUpdateConsecutiveErrors; | ||
| _backOffIntervalTimer = backOffIntervalTimer; | ||
| State = state; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Stops transmitting data to backend. | ||
| /// </summary> | ||
| private void OpenTransmission() | ||
|
vishweshbankwar marked this conversation as resolved.
|
||
| { | ||
| State = TransmissionState.Open; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Enable transmitting data to backend. | ||
| /// To be called for each successful request or after back-off interval expiration. | ||
| /// </summary> | ||
| internal void CloseTransmission() | ||
| { | ||
| State = TransmissionState.Closed; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Resets consecutive error count. | ||
| /// To be called for each successful request. | ||
| /// </summary> | ||
| internal void ResetConsecutiveErrors() | ||
| { | ||
| Interlocked.Exchange(ref _consecutiveErrors, 0); | ||
| } | ||
|
|
||
| internal void ResetTransmission(object source, System.Timers.ElapsedEventArgs e) | ||
| { | ||
| CloseTransmission(); | ||
| } | ||
|
|
||
| internal void EnableBackOff(Response? response) | ||
| { | ||
| if (Interlocked.Exchange(ref _syncBackOffIntervalCalculation, 1) == 0) | ||
| { | ||
| // Do not increase number of errors more often than minimum interval. | ||
| // since we can have 4 parallel transmissions (logs, metrics, traces and offline storage tranmission) and all of them most likely would fail if we have intermittent error. | ||
| if (DateTimeOffset.UtcNow > _nextMinTimeToUpdateConsecutiveErrors) | ||
| { | ||
| Interlocked.Increment(ref _consecutiveErrors); | ||
| _nextMinTimeToUpdateConsecutiveErrors = DateTimeOffset.UtcNow + _minIntervalToUpdateConsecutiveErrors; | ||
|
|
||
| // If backend responded with a retryAfter header we will use it | ||
| // else we will calculate by increasing time interval exponentially. | ||
| var backOffTimeInterval = HttpPipelineHelper.TryGetRetryIntervalTimespan(response, out var retryAfterInterval) ? retryAfterInterval : GetBackOffTimeInterval(); | ||
|
|
||
| if (backOffTimeInterval > TimeSpan.Zero) | ||
| { | ||
| OpenTransmission(); | ||
|
|
||
| _backOffIntervalTimer.Interval = backOffTimeInterval.TotalMilliseconds; | ||
|
|
||
| _backOffIntervalTimer.Start(); | ||
| } | ||
| } | ||
|
|
||
| Interlocked.Exchange(ref _syncBackOffIntervalCalculation, 0); | ||
| } | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Calculates the time interval for which the transmission should be halted. | ||
| /// Number of consecutive errors are taken in to account to increase the time. | ||
| /// Random variation is introduced in order to avoid collision. | ||
| /// </summary> | ||
| /// <returns>BackOff time interval.</returns> | ||
| internal TimeSpan GetBackOffTimeInterval() | ||
| { | ||
| double delayInSeconds = 0; | ||
| if (_consecutiveErrors > 0) | ||
| { | ||
| double backOffSlot = (Math.Pow(2, _consecutiveErrors) - 1) / 2; | ||
| var backOffDelay = _random.Next(1, (int)Math.Min(backOffSlot * MinDelayInSeconds, int.MaxValue)); | ||
| delayInSeconds = Math.Max(Math.Min(backOffDelay, MaxDelayInSeconds), MinDelayInSeconds); | ||
| } | ||
|
|
||
| return TimeSpan.FromSeconds(delayInSeconds); | ||
| } | ||
|
|
||
| protected virtual void Dispose(bool disposing) | ||
| { | ||
| if (!_disposed) | ||
| { | ||
| if (disposing) | ||
| { | ||
| _backOffIntervalTimer?.Dispose(); | ||
| } | ||
|
|
||
| _disposed = true; | ||
| } | ||
| } | ||
|
|
||
| public void Dispose() | ||
| { | ||
| // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method | ||
| Dispose(disposing: true); | ||
| GC.SuppressFinalize(this); | ||
| } | ||
| } | ||
| } | ||
43 changes: 43 additions & 0 deletions
43
...xporter/tests/Azure.Monitor.OpenTelemetry.Exporter.Tests/TransmissionStateManagerTests.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| // Copyright (c) Microsoft Corporation. All rights reserved. | ||
| // Licensed under the MIT License. | ||
|
|
||
| using System; | ||
| using Azure.Core.TestFramework; | ||
| using Azure.Monitor.OpenTelemetry.Exporter.Internals; | ||
| using Xunit; | ||
|
|
||
| namespace Azure.Monitor.OpenTelemetry.Exporter.Tests | ||
| { | ||
| public class TransmissionStateManagerTests | ||
| { | ||
| [Fact] | ||
| public void EnableBackOffSetsStateToOpen() | ||
| { | ||
| var transmissionStateManager = new TransmissionStateManager(); | ||
| MockResponse mockResponse = new MockResponse(500, "Internal Server Error"); | ||
| transmissionStateManager.EnableBackOff(mockResponse); | ||
|
|
||
| Assert.Equal(TransmissionState.Open, transmissionStateManager.State); | ||
| } | ||
|
|
||
| [Fact] | ||
| public void EnableBackOffSetsStateToOpenUsingRetryAfterHeaderInResponse() | ||
| { | ||
| System.Timers.Timer backOffIntervalTimer = new(); | ||
| var transmissionStateManager = new TransmissionStateManager( | ||
| random: new(), | ||
| minIntervalToUpdateConsecutiveErrors: TimeSpan.FromSeconds(20), | ||
| nextMinTimeToUpdateConsecutiveErrors: DateTimeOffset.MinValue, | ||
| backOffIntervalTimer: backOffIntervalTimer, | ||
| TransmissionState.Closed | ||
| ); | ||
|
|
||
| MockResponse mockResponse = new MockResponse(429, "Internal Server Error"); | ||
| mockResponse.AddHeader("Retry-After", "20"); | ||
| transmissionStateManager.EnableBackOff(mockResponse); | ||
|
|
||
| Assert.Equal(20000, backOffIntervalTimer.Interval); | ||
| Assert.Equal(TransmissionState.Open, transmissionStateManager.State); | ||
| } | ||
| } | ||
| } |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.