diff --git a/experimental/CHANGELOG.md b/experimental/CHANGELOG.md index a6dcfd65e38..31bb5a12327 100644 --- a/experimental/CHANGELOG.md +++ b/experimental/CHANGELOG.md @@ -22,6 +22,7 @@ For notes on migrating to 2.x / 0.200.x see [the upgrade guide](doc/upgrade-to-2 * feat(opentelemetry-sdk-node): set instrumentation and propagators for experimental start [#6148](https://github.com/open-telemetry/opentelemetry-js/pull/6148) @maryliag * refactor(configuration): set console exporter as empty object [#6164](https://github.com/open-telemetry/opentelemetry-js/pull/6164) @maryliag * feat(instrumentation-http, instrumentation-fetch, instrumentation-xml-http-request): support "QUERY" as a known HTTP method +* feat(otlp-exporter-base): ensure that we retry for at least 80% of specified user timeout [#6260](https://github.com/open-telemetry/opentelemetry-js/pull/6260) @jsokol805 ### :bug: Bug Fixes diff --git a/experimental/packages/otlp-exporter-base/src/retrying-transport.ts b/experimental/packages/otlp-exporter-base/src/retrying-transport.ts index 0d5e4184e5c..58c06235225 100644 --- a/experimental/packages/otlp-exporter-base/src/retrying-transport.ts +++ b/experimental/packages/otlp-exporter-base/src/retrying-transport.ts @@ -18,9 +18,10 @@ import { IExporterTransport } from './exporter-transport'; import { ExportResponse } from './export-response'; import { diag } from '@opentelemetry/api'; -const MAX_ATTEMPTS = 5; const INITIAL_BACKOFF = 1000; -const MAX_BACKOFF = 5000; +const MINIMAL_MAX_BACKOFF = 5000; +const MAX_BACKOFF_MULTIPLIER = 0.2; +const MAX_ATTEMPTS = 20; const BACKOFF_MULTIPLIER = 1.5; const JITTER = 0.2; @@ -53,6 +54,10 @@ class RetryingTransport implements IExporterTransport { async send(data: Uint8Array, timeoutMillis: number): Promise { let attempts = MAX_ATTEMPTS; let nextBackoff = INITIAL_BACKOFF; + const maxBackoff = Math.max( + MINIMAL_MAX_BACKOFF, + MAX_BACKOFF_MULTIPLIER * timeoutMillis + ); const deadline = Date.now() + timeoutMillis; let result = await this._transport.send(data, timeoutMillis); @@ -62,7 +67,7 @@ class RetryingTransport implements IExporterTransport { // use maximum of computed backoff and 0 to avoid negative timeouts const backoff = Math.max( - Math.min(nextBackoff * (1 + getJitter()), MAX_BACKOFF), + Math.min(nextBackoff * (1 + getJitter()), maxBackoff), 0 ); nextBackoff = nextBackoff * BACKOFF_MULTIPLIER; diff --git a/experimental/packages/otlp-exporter-base/test/common/retrying-transport.test.ts b/experimental/packages/otlp-exporter-base/test/common/retrying-transport.test.ts index fe3c2a662e4..0ff0ef02b8c 100644 --- a/experimental/packages/otlp-exporter-base/test/common/retrying-transport.test.ts +++ b/experimental/packages/otlp-exporter-base/test/common/retrying-transport.test.ts @@ -24,6 +24,10 @@ const timeoutMillis = 1000000; describe('RetryingTransport', function () { describe('send', function () { + afterEach(function () { + sinon.restore(); + }); + it('does not retry when underlying transport succeeds', async function () { // arrange const expectedResponse: ExportResponse = { @@ -179,15 +183,14 @@ describe('RetryingTransport', function () { ); }); - it('does retry 5 times, then resolves as retryable', async function () { + it('stops retrying when timeout is exhausted', async function () { // arrange - // make random return a negative value so that what's passed to setTimeout() is negative and therefore gets executed immediately. - Math.random = sinon.stub().returns(-Infinity); + const clock = sinon.useFakeTimers(); + const testTimeout = 10000; // 10 seconds const retryResponse: ExportResponse = { status: 'retryable', }; - const mockData = Uint8Array.from([1, 2, 3]); const transportStubs = { @@ -198,20 +201,30 @@ describe('RetryingTransport', function () { const transport = createRetryingTransport({ transport: mockTransport }); // act - const result = await transport.send(mockData, timeoutMillis); + let resolved = false; + let result: ExportResponse | undefined; + transport.send(mockData, testTimeout).then(r => { + resolved = true; + result = r; + }); + + while (!resolved) { + await clock.tickAsync(100); + } // assert - sinon.assert.callCount(transportStubs.send, 6); // 1 initial try and 5 retries - sinon.assert.alwaysCalledWithMatch( - transportStubs.send, - mockData, - sinon.match.number.and( - sinon.match(value => { - return value <= timeoutMillis; - }) - ) + assert.strictEqual(result!.status, 'retryable'); + // At least one of these conditions caused the stop: + // - timeout exhausted, OR + // - max attempts reached + assert.ok( + transportStubs.send.callCount > 1, + `Should have retried at least once` + ); + assert.ok( + transportStubs.send.callCount <= 21, // initial + 20 retries + `Should not exceed max attempts` ); - assert.strictEqual(result, retryResponse); }); it('does not retry when retryInMillis takes place after timeoutMillis', async function () { @@ -242,5 +255,48 @@ describe('RetryingTransport', function () { ); assert.strictEqual(result, retryResponse); }); + + it('uses at least 80% of timeout duration for retries', async function () { + // arrange + const clock = sinon.useFakeTimers(); + const testTimeout = 60000; // 60 seconds + + const retryResponse: ExportResponse = { + status: 'retryable', + }; + const mockData = Uint8Array.from([1, 2, 3]); + + const transportStubs = { + send: sinon.stub().callsFake(() => { + return Promise.resolve(retryResponse); + }), + shutdown: sinon.stub(), + }; + const mockTransport = transportStubs; + const transport = createRetryingTransport({ transport: mockTransport }); + + // act + let resolved = false; + const resultPromise = transport.send(mockData, testTimeout).then(r => { + resolved = true; + return r; + }); + + // Advance time in small increments, allowing promises to resolve between ticks + while (!resolved) { + await clock.tickAsync(1000); + } + + await resultPromise; + + // assert + const timeUsed = clock.now; + const timeUsedThreshold = 0.8 * testTimeout; + + assert.ok( + timeUsed >= timeUsedThreshold, + `Expected to use at least ${timeUsedThreshold}, but only used ${timeUsed}ms (timeout: ${testTimeout}ms)` + ); + }); }); });