diff --git a/config/serverless.yml b/config/serverless.yml index 33eaec2b22b6a..8319d4c0ecee9 100644 --- a/config/serverless.yml +++ b/config/serverless.yml @@ -7,6 +7,7 @@ xpack.fleet.internal.disableILMPolicies: true xpack.fleet.internal.disableProxies: true xpack.fleet.internal.activeAgentsSoftLimit: 25000 xpack.fleet.internal.onlyAllowAgentUpgradeToKnownVersions: true +xpack.fleet.internal.retrySetupOnBoot: true # Cloud links xpack.cloud.base_url: 'https://cloud.elastic.co' diff --git a/package.json b/package.json index 3b7f8c030fc8f..5ae2f7ff60c3d 100644 --- a/package.json +++ b/package.json @@ -887,6 +887,7 @@ "email-addresses": "^5.0.0", "execa": "^5.1.1", "expiry-js": "0.1.7", + "exponential-backoff": "^3.1.1", "extract-zip": "^2.0.1", "fast-deep-equal": "^3.1.1", "fflate": "^0.6.9", diff --git a/x-pack/plugins/fleet/common/types/index.ts b/x-pack/plugins/fleet/common/types/index.ts index 47f62c8e19794..6ef34d045e20f 100644 --- a/x-pack/plugins/fleet/common/types/index.ts +++ b/x-pack/plugins/fleet/common/types/index.ts @@ -51,6 +51,7 @@ export interface FleetConfigType { fleetServerStandalone: boolean; onlyAllowAgentUpgradeToKnownVersions: boolean; activeAgentsSoftLimit?: number; + retrySetupOnBoot: boolean; registry: { kibanaVersionCheckEnabled: boolean; capabilities: string[]; diff --git a/x-pack/plugins/fleet/server/config.ts b/x-pack/plugins/fleet/server/config.ts index b68684460bf81..f1210a49f7f0c 100644 --- a/x-pack/plugins/fleet/server/config.ts +++ b/x-pack/plugins/fleet/server/config.ts @@ -188,6 +188,7 @@ export const config: PluginConfigDescriptor = { min: 0, }) ), + retrySetupOnBoot: schema.boolean({ defaultValue: false }), registry: schema.object( { kibanaVersionCheckEnabled: schema.boolean({ defaultValue: true }), diff --git a/x-pack/plugins/fleet/server/plugin.ts b/x-pack/plugins/fleet/server/plugin.ts index 9603eb2b47064..e0aa5315d8ff9 100644 --- a/x-pack/plugins/fleet/server/plugin.ts +++ b/x-pack/plugins/fleet/server/plugin.ts @@ -5,6 +5,7 @@ * 2.0. */ +import { backOff } from 'exponential-backoff'; import type { Observable } from 'rxjs'; import { BehaviorSubject } from 'rxjs'; import { take, filter } from 'rxjs/operators'; @@ -532,9 +533,39 @@ export class FleetPlugin ) .toPromise(); - await setupFleet( - new SavedObjectsClient(core.savedObjects.createInternalRepository()), - core.elasticsearch.client.asInternalUser + // Retry Fleet setup w/ backoff + await backOff( + async () => { + await setupFleet( + new SavedObjectsClient(core.savedObjects.createInternalRepository()), + core.elasticsearch.client.asInternalUser + ); + }, + { + // We only retry when this feature flag is enabled + numOfAttempts: this.configInitialValue.internal?.retrySetupOnBoot ? Infinity : 1, + // 250ms initial backoff + startingDelay: 250, + // 5m max backoff + maxDelay: 60000 * 5, + timeMultiple: 2, + // avoid HA contention with other Kibana instances + jitter: 'full', + retry: (error: any, attemptCount: number) => { + const summary = `Fleet setup attempt ${attemptCount} failed, will retry after backoff`; + logger.debug(summary, { error: { message: error } }); + + this.fleetStatus$.next({ + level: ServiceStatusLevels.available, + summary, + meta: { + attemptCount, + error, + }, + }); + return true; + }, + } ); this.fleetStatus$.next({ @@ -542,8 +573,7 @@ export class FleetPlugin summary: 'Fleet is available', }); } catch (error) { - logger.warn('Fleet setup failed'); - logger.warn(error); + logger.warn('Fleet setup failed', { error: { message: error } }); this.fleetStatus$.next({ // As long as Fleet has a dependency on EPR, we can't reliably set Kibana status to `unavailable` here. diff --git a/x-pack/plugins/fleet/server/services/epm/packages/_install_package.test.ts b/x-pack/plugins/fleet/server/services/epm/packages/_install_package.test.ts index b7fe0d95310ef..af3460e266af1 100644 --- a/x-pack/plugins/fleet/server/services/epm/packages/_install_package.test.ts +++ b/x-pack/plugins/fleet/server/services/epm/packages/_install_package.test.ts @@ -135,6 +135,7 @@ describe('_installPackage', () => { disableProxies: false, fleetServerStandalone: false, onlyAllowAgentUpgradeToKnownVersions: false, + retrySetupOnBoot: false, registry: { kibanaVersionCheckEnabled: true, capabilities: [], @@ -192,6 +193,7 @@ describe('_installPackage', () => { disableILMPolicies: false, fleetServerStandalone: false, onlyAllowAgentUpgradeToKnownVersions: false, + retrySetupOnBoot: false, registry: { kibanaVersionCheckEnabled: true, capabilities: [], @@ -265,6 +267,7 @@ describe('_installPackage', () => { disableProxies: false, fleetServerStandalone: false, onlyAllowAgentUpgradeToKnownVersions: false, + retrySetupOnBoot: false, registry: { kibanaVersionCheckEnabled: true, capabilities: [],