Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
b3e78b3
Expose isRetryableEsClientError
rudolf Jul 16, 2025
aa48382
[CI] Auto-commit changed files from 'node scripts/yarn_deduplicate'
kibanamachine Jul 16, 2025
6ebe2f2
Merge remote-tracking branch 'origin' into expose-is-retryable
rudolf Aug 21, 2025
0eaa367
Expose isRetryableEsClientError from a dedicated package to avoid cir…
rudolf Aug 21, 2025
bbfc753
Adopt new core util from plugins
rudolf Aug 21, 2025
dcbdfcf
Restore test and small fixes
rudolf Aug 22, 2025
a14f6c7
[CI] Auto-commit changed files from 'node scripts/notice'
kibanamachine Aug 22, 2025
3da2d71
Merge branch 'main' into expose-is-retryable
rudolf Aug 22, 2025
6165e72
Fix test
rudolf Aug 25, 2025
565d8f3
Merge branch 'main' of github.com:elastic/kibana into expose-is-retry…
rudolf Aug 25, 2025
8b2f8cb
Merge branch 'expose-is-retryable' of github.com:rudolf/kibana into e…
rudolf Aug 25, 2025
6712b71
Merge branch 'main' into expose-is-retryable
rudolf Sep 24, 2025
262caba
[CI] Auto-commit changed files from 'node scripts/generate codeowners'
kibanamachine Sep 24, 2025
6bb974d
Merge branch 'main' into expose-is-retryable
rudolf Sep 26, 2025
e4c9562
Merge remote-tracking branch 'origin' into expose-is-retryable
rudolf Sep 29, 2025
c6f210b
Merge branch 'main' into expose-is-retryable
rudolf Oct 10, 2025
719ad16
Fix type errors
rudolf Oct 10, 2025
5ce4e4e
[CI] Auto-commit changed files from 'node scripts/yarn_deduplicate'
kibanamachine Oct 10, 2025
3b26b43
Cleanup after merge
rudolf Oct 10, 2025
bdfaa2c
Merge remote-tracking branch 'origin' into expose-is-retryable
rudolf Oct 13, 2025
c124d3d
security index-adapter now retries on 429 circuit breakers
rudolf Oct 13, 2025
d0829f4
Merge branch 'main' of github.com:elastic/kibana into expose-is-retry…
rudolf Oct 14, 2025
91b2eb9
Merge remote-tracking branch 'origin' into expose-is-retryable
rudolf Oct 15, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ src/core/packages/elasticsearch/client-server-mocks @elastic/kibana-core
src/core/packages/elasticsearch/server @elastic/kibana-core
src/core/packages/elasticsearch/server-internal @elastic/kibana-core
src/core/packages/elasticsearch/server-mocks @elastic/kibana-core
src/core/packages/elasticsearch/server-utils @elastic/kibana-core
src/core/packages/environment/server-internal @elastic/kibana-core
src/core/packages/environment/server-mocks @elastic/kibana-core
src/core/packages/execution-context/browser @elastic/kibana-core
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@
"@kbn/core-elasticsearch-client-server-internal": "link:src/core/packages/elasticsearch/client-server-internal",
"@kbn/core-elasticsearch-server": "link:src/core/packages/elasticsearch/server",
"@kbn/core-elasticsearch-server-internal": "link:src/core/packages/elasticsearch/server-internal",
"@kbn/core-elasticsearch-server-utils": "link:src/core/packages/elasticsearch/server-utils",
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had to introduce a new package to avoid circular dependencies:
server → client-server-mocks → client-server-internal → server

"@kbn/core-environment-server-internal": "link:src/core/packages/environment/server-internal",
"@kbn/core-execution-context-browser": "link:src/core/packages/execution-context/browser",
"@kbn/core-execution-context-browser-internal": "link:src/core/packages/execution-context/browser-internal",
Expand Down
1 change: 0 additions & 1 deletion src/core/packages/elasticsearch/server-internal/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,4 @@ export { CoreElasticsearchRouteHandlerContext } from './src/elasticsearch_route_
export { retryCallCluster, migrationRetryCallCluster } from './src/retry_call_cluster';
export { isInlineScriptingEnabled } from './src/is_scripting_enabled';
export { getCapabilitiesFromClient } from './src/get_capabilities';
export { isRetryableEsClientError } from './src/retryable_es_client_errors';
export type { ClusterInfo } from './src/get_cluster_info';
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/

import { isRetryableEsClientErrorMock } from './is_scripting_enabled.test.mocks';
import type { estypes } from '@elastic/elasticsearch';
import { errors as esErrors } from '@elastic/elasticsearch';
import { elasticsearchClientMock } from '@kbn/core-elasticsearch-client-server-mocks';
import { isInlineScriptingEnabled } from './is_scripting_enabled';

Expand Down Expand Up @@ -98,55 +98,53 @@ describe('isInlineScriptingEnabled', () => {
});

describe('resiliency', () => {
beforeEach(() => {
isRetryableEsClientErrorMock.mockReset();
});

const mockSuccessOnce = () => {
client.cluster.getSettings.mockResolvedValueOnce({
transient: {},
persistent: {},
defaults: {},
});
};
const mockErrorOnce = () => {
client.cluster.getSettings.mockResponseImplementationOnce(() => {
throw Error('ERR CON REFUSED');
});

const mockRetryableErrorOnce = () => {
client.cluster.getSettings.mockRejectedValueOnce(
new esErrors.ConnectionError(
'Connection failed',
elasticsearchClientMock.createApiResponse()
)
);
};

it('retries the ES api call in case of retryable error', async () => {
isRetryableEsClientErrorMock.mockReturnValue(true);
const mockNonRetryableErrorOnce = () => {
client.cluster.getSettings.mockRejectedValueOnce(new Error('Non-retryable error'));
};

mockErrorOnce();
it('retries the ES api call in case of retryable error', async () => {
mockRetryableErrorOnce();
mockSuccessOnce();

await expect(isInlineScriptingEnabled({ client, maxRetryDelay: 1 })).resolves.toEqual(true);
expect(client.cluster.getSettings).toHaveBeenCalledTimes(2);
});

it('throws in case of non-retryable error', async () => {
isRetryableEsClientErrorMock.mockReturnValue(false);

mockErrorOnce();
mockNonRetryableErrorOnce();
mockSuccessOnce();

await expect(isInlineScriptingEnabled({ client, maxRetryDelay: 0.1 })).rejects.toThrowError(
'ERR CON REFUSED'
'Non-retryable error'
);
});

it('retries up to `maxRetries` times', async () => {
isRetryableEsClientErrorMock.mockReturnValue(true);

mockErrorOnce();
mockErrorOnce();
mockErrorOnce();
mockRetryableErrorOnce();
mockRetryableErrorOnce();
mockRetryableErrorOnce();
mockSuccessOnce();

await expect(
isInlineScriptingEnabled({ client, maxRetryDelay: 0.1, maxRetries: 2 })
).rejects.toThrowError('ERR CON REFUSED');
).rejects.toThrowError('Connection failed');
expect(client.cluster.getSettings).toHaveBeenCalledTimes(3);
});
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
*/

import { defer, map, retry, timer, firstValueFrom, throwError } from 'rxjs';
import { isRetryableEsClientError } from '@kbn/core-elasticsearch-server-utils';
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New behavior here is that we won't retry for 401, 403, which makes more sense as a default behavior, like you mentioned.

import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
import { isRetryableEsClientError } from './retryable_es_client_errors';

const scriptAllowedTypesKey = 'script.allowed_types';

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"@kbn/core-http-server-internal",
"@kbn/core-execution-context-server-internal",
"@kbn/core-elasticsearch-server",
"@kbn/core-elasticsearch-server-utils",
"@kbn/core-elasticsearch-client-server-internal",
"@kbn/core-test-helpers-deprecations-getters",
"@kbn/config",
Expand Down
3 changes: 3 additions & 0 deletions src/core/packages/elasticsearch/server-utils/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# @kbn/core-elasticsearch-server-utils

Utilities for working with Elasticsearch
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,4 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/

export const isRetryableEsClientErrorMock = jest.fn();

jest.doMock('./retryable_es_client_errors', () => {
return {
isRetryableEsClientError: isRetryableEsClientErrorMock,
};
});
export { isRetryableEsClientError } from './src/is_retryable_es_client_error';
14 changes: 14 additions & 0 deletions src/core/packages/elasticsearch/server-utils/jest.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

module.exports = {
preset: '@kbn/test/jest_node',
rootDir: '../../../../..',
roots: ['<rootDir>/src/core/packages/elasticsearch/server-utils'],
};
7 changes: 7 additions & 0 deletions src/core/packages/elasticsearch/server-utils/kibana.jsonc
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"type": "shared-common",
"id": "@kbn/core-elasticsearch-server-utils",
"owner": "@elastic/kibana-core",
"group": "platform",
"visibility": "shared"
}
6 changes: 6 additions & 0 deletions src/core/packages/elasticsearch/server-utils/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"name": "@kbn/core-elasticsearch-server-utils",
"private": true,
"version": "1.0.0",
"license": "Elastic License 2.0 OR AGPL-3.0-only OR SSPL-1.0"
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import { errors as esErrors } from '@elastic/elasticsearch';
import { elasticsearchClientMock } from '@kbn/core-elasticsearch-client-server-mocks';
import { isRetryableEsClientError } from './retryable_es_client_errors';
import { isRetryableEsClientError } from './is_retryable_es_client_error';

describe('isRetryableEsClientError', () => {
describe('returns `false` for', () => {
Expand Down Expand Up @@ -51,27 +51,34 @@ describe('isRetryableEsClientError', () => {
expect(isRetryableEsClientError(error)).toEqual(true);
});

it('ResponseError of type snapshot_in_progress_exception', () => {
it.each([503, 504, 408, 410, 429])('ResponseError with %p status code', (statusCode) => {
const error = new esErrors.ResponseError(
elasticsearchClientMock.createApiResponse({
body: { error: { type: 'snapshot_in_progress_exception' } },
statusCode,
body: { error: { type: 'reason' } },
})
);

expect(isRetryableEsClientError(error)).toEqual(true);
});

it.each([503, 504, 401, 403, 408, 410, 429])(
'ResponseError with %p status code',
(statusCode) => {
const error = new esErrors.ResponseError(
elasticsearchClientMock.createApiResponse({
statusCode,
body: { error: { type: 'reason' } },
})
);
it('custom response status codes', () => {
const retryableError = new esErrors.ResponseError(
elasticsearchClientMock.createApiResponse({
statusCode: 418, // I'm a retryable teapot
body: { error: { type: 'reason' } },
})
);

expect(isRetryableEsClientError(error)).toEqual(true);
}
);
const nonRetryableError = new esErrors.ResponseError(
elasticsearchClientMock.createApiResponse({
statusCode: 503, // 503 is retryable by default but not in our custom retry codes
body: { error: { type: 'reason' } },
})
);

expect(isRetryableEsClientError(retryableError, [418])).toEqual(true);
expect(isRetryableEsClientError(nonRetryableError, [418])).toEqual(false);
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@

import { errors as EsErrors } from '@elastic/elasticsearch';

const retryResponseStatuses = [
401, // AuthorizationException
403, // AuthenticationException
const DEFAULT_RETRY_STATUS_CODES = [
408, // RequestTimeout
410, // Gone
429, // TooManyRequests -> ES circuit breaker
Expand All @@ -21,20 +19,32 @@ const retryResponseStatuses = [

/**
* Returns true if the given elasticsearch error should be retried
* by retry-based resiliency systems such as the SO migration, false otherwise.
*
* Retryable errors include:
* - NoLivingConnectionsError
* - ConnectionError
* - TimeoutError
* - ResponseError with status codes:
* - 408 RequestTimeout
* - 410 Gone
* - 429 TooManyRequests (ES circuit breaker)
* - 503 ServiceUnavailable
* - 504 GatewayTimeout
* - OR custom status codes if provided
* @param e The error to check
* @param customRetryStatusCodes Custom response status codes to consider as retryable
* @returns true if the error is retryable, false otherwise
*/
export const isRetryableEsClientError = (e: EsErrors.ElasticsearchClientError): boolean => {
export const isRetryableEsClientError = (
e: EsErrors.ElasticsearchClientError,
customRetryStatusCodes?: number[]
): boolean => {
if (
e instanceof EsErrors.NoLivingConnectionsError ||
e instanceof EsErrors.ConnectionError ||
e instanceof EsErrors.TimeoutError ||
(e instanceof EsErrors.ResponseError &&
(retryResponseStatuses.includes(e?.statusCode!) ||
// ES returns a 400 Bad Request when trying to close or delete an
// index while snapshots are in progress. This should have been a 503
// so once https://github.com/elastic/elasticsearch/issues/65883 is
// fixed we can remove this.
e?.body?.error?.type === 'snapshot_in_progress_exception'))
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

snapshot_in_progress_exception no longer blocks other API calls

(customRetryStatusCodes ?? DEFAULT_RETRY_STATUS_CODES).includes(e?.statusCode!))
) {
return true;
}
Expand Down
19 changes: 19 additions & 0 deletions src/core/packages/elasticsearch/server-utils/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"extends": "../../../../../tsconfig.base.json",
"compilerOptions": {
"outDir": "target/types",
"types": [
"jest",
"node"
]
},
"include": [
"**/*.ts"
],
"kbn_references": [
"@kbn/core-elasticsearch-client-server-mocks"
],
"exclude": [
"target/**/*",
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,6 @@ describe('catchRetryableEsClientErrors', () => {
type: 'retryable_es_client_error',
});
});
it('ResponseError of type snapshot_in_progress_exception', async () => {
const error = new esErrors.ResponseError(
elasticsearchClientMock.createApiResponse({
body: { error: { type: 'snapshot_in_progress_exception' } },
})
);
expect(
((await Promise.reject(error).catch(catchRetryableEsClientErrors)) as any).left
).toMatchObject({
message: 'snapshot_in_progress_exception',
type: 'retryable_es_client_error',
});
});
it.each([503, 401, 403, 408, 410, 429])(
'ResponseError with retryable status code (%d)',
async (status) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,30 @@

import * as Either from 'fp-ts/Either';
import type { errors as EsErrors } from '@elastic/elasticsearch';
import { isRetryableEsClientError } from '@kbn/core-elasticsearch-server-internal';
import { isRetryableEsClientError } from '@kbn/core-elasticsearch-server-utils';

export interface RetryableEsClientError {
type: 'retryable_es_client_error';
message: string;
error?: Error;
}

// Migrations also retry on Auth exceptions as this is a common failure for newly created
// clusters that might have misconfigured credentials.
const retryResponseStatuses = [
401, // AuthorizationException
403, // AuthenticationException
Comment on lines +23 to +24
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The shared function doesn't retry on auth errors since usually it's not a transient error.

Copy link
Copy Markdown
Member

@gsoldevila gsoldevila Sep 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should populate the ones below by spreading the default const ...DEFAULT_RETRY_STATUS_CODES (that would require exposing the const 🤔 )

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or in other words, is there a scenario where it does not make sense to retry on any of the values below? I'm asking cause if we add a new status code on our DEFAULT list in the future, we might forget to also add it in the "custom" lists.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another option would be to use an incremental approach:

{
  alsoRetryOn: [401, 403],
  doNotRetryOn: [xxx, yyy],
}

this way users can automatically benefit from updates in the DEFAULT_RETRY_STATUS_CODES list, and they can add their custom exceptions.

408, // RequestTimeout
410, // Gone
429, // TooManyRequests -> ES circuit breaker
503, // ServiceUnavailable
504, // GatewayTimeout
];

export const catchRetryableEsClientErrors = (
e: EsErrors.ElasticsearchClientError
): Either.Either<RetryableEsClientError, never> => {
if (isRetryableEsClientError(e)) {
if (isRetryableEsClientError(e, retryResponseStatuses)) {
return Either.left({
type: 'retryable_es_client_error' as const,
message: e?.message,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"@kbn/std",
"@kbn/core-doc-links-server",
"@kbn/core-elasticsearch-server",
"@kbn/core-elasticsearch-server-utils",
"@kbn/core-elasticsearch-client-server-internal",
"@kbn/core-saved-objects-common",
"@kbn/core-saved-objects-server",
Expand Down
2 changes: 2 additions & 0 deletions tsconfig.base.json
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,8 @@
"@kbn/core-elasticsearch-server-internal/*": ["src/core/packages/elasticsearch/server-internal/*"],
"@kbn/core-elasticsearch-server-mocks": ["src/core/packages/elasticsearch/server-mocks"],
"@kbn/core-elasticsearch-server-mocks/*": ["src/core/packages/elasticsearch/server-mocks/*"],
"@kbn/core-elasticsearch-server-utils": ["src/core/packages/elasticsearch/server-utils"],
"@kbn/core-elasticsearch-server-utils/*": ["src/core/packages/elasticsearch/server-utils/*"],
"@kbn/core-environment-server-internal": ["src/core/packages/environment/server-internal"],
"@kbn/core-environment-server-internal/*": ["src/core/packages/environment/server-internal/*"],
"@kbn/core-environment-server-mocks": ["src/core/packages/environment/server-mocks"],
Expand Down
Loading