From 71e96b40c26826bbbc70468e24c0466194ca52d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Louv-Jansen?= Date: Wed, 2 Apr 2025 10:54:19 +0200 Subject: [PATCH 1/2] [Obs AI Assistant] Retry `release` to fix flaky tests --- .../service/distributed_lock_manager/lock_manager_client.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/distributed_lock_manager/lock_manager_client.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/distributed_lock_manager/lock_manager_client.ts index 18412a17bde30..dc5e932f4e3f9 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/distributed_lock_manager/lock_manager_client.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/distributed_lock_manager/lock_manager_client.ts @@ -140,6 +140,7 @@ export class LockManager { const response = await this.esClient.update({ index: LOCKS_CONCRETE_INDEX_NAME, id: this.lockId, + retry_on_conflict: 3, // Retry on version conflict. This can necessary if `extendTtl` is called at the same time. scripted_upsert: false, script: { lang: 'painless', From b2e4f6bb48696a87b791f7d656562bc772931b6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Louv-Jansen?= Date: Wed, 2 Apr 2025 11:36:00 +0200 Subject: [PATCH 2/2] =?UTF-8?q?Don=E2=80=99t=20release=20until=20extend=20?= =?UTF-8?q?has=20finished?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../lock_manager_client.ts | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/distributed_lock_manager/lock_manager_client.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/distributed_lock_manager/lock_manager_client.ts index dc5e932f4e3f9..8fedfd81d978e 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/distributed_lock_manager/lock_manager_client.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/distributed_lock_manager/lock_manager_client.ts @@ -140,7 +140,6 @@ export class LockManager { const response = await this.esClient.update({ index: LOCKS_CONCRETE_INDEX_NAME, id: this.lockId, - retry_on_conflict: 3, // Retry on version conflict. This can necessary if `extendTtl` is called at the same time. scripted_upsert: false, script: { lang: 'painless', @@ -286,20 +285,26 @@ export async function withLock( `Lock "${lockId}" acquired. Extending TTL every ${prettyMilliseconds(extendInterval)}` ); + let extendTTlPromise = Promise.resolve(true); const intervalId = setInterval(() => { - lockManager.extendTtl().catch((err) => { - logger.error(`Failed to extend lock "${lockId}":`, err); - }); + // wait for the previous extendTtl request to finish before sending the next one. This is to avoid flooding ES with extendTtl requests in cases where ES is slow to respond. + extendTTlPromise = extendTTlPromise + .then(() => lockManager.extendTtl()) + .catch((err) => { + logger.error(`Failed to extend lock "${lockId}":`, err); + return false; + }); }, extendInterval); try { return await callback(); } finally { - clearInterval(intervalId); try { + clearInterval(intervalId); + await extendTTlPromise; await lockManager.release(); } catch (error) { - logger.error(`Failed to release lock "${lockId}": ${error.message}`); + logger.error(`Failed to release lock "${lockId}" in withLock: ${error.message}`); } } }