From 78694f31d167145688506e1342ea65a4e2107175 Mon Sep 17 00:00:00 2001 From: "dionisio-bot[bot]" <117394943+dionisio-bot[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2024 02:15:42 +0000 Subject: [PATCH] fix: app subprocess restart on error and better reporting (#34193) Co-authored-by: Guilherme Gazzo <5263975+ggazzo@users.noreply.github.com> Co-authored-by: Douglas Gubert <1810309+d-gubert@users.noreply.github.com> --- .changeset/giant-nails-trade.md | 5 ++ .changeset/honest-kings-allow.md | 5 ++ .changeset/quiet-radios-fry.md | 5 ++ .changeset/young-dots-cheat.md | 5 ++ .../deno-runtime/handlers/app/handler.ts | 56 +++++++++---------- .../apps-engine/deno-runtime/lib/messenger.ts | 4 ++ .../deno-runtime/lib/metricsCollector.ts | 20 +++++++ packages/apps-engine/deno-runtime/main.ts | 3 + .../runtime/deno/AppsEngineDenoRuntime.ts | 32 +++++++---- .../server/runtime/deno/LivenessManager.ts | 15 +++-- .../server/runtime/deno/ProcessMessenger.ts | 6 +- 11 files changed, 111 insertions(+), 45 deletions(-) create mode 100644 .changeset/giant-nails-trade.md create mode 100644 .changeset/honest-kings-allow.md create mode 100644 .changeset/quiet-radios-fry.md create mode 100644 .changeset/young-dots-cheat.md create mode 100644 packages/apps-engine/deno-runtime/lib/metricsCollector.ts diff --git a/.changeset/giant-nails-trade.md b/.changeset/giant-nails-trade.md new file mode 100644 index 0000000000000..76e92e9992472 --- /dev/null +++ b/.changeset/giant-nails-trade.md @@ -0,0 +1,5 @@ +--- +'@rocket.chat/apps-engine': patch +--- + +Adds simple app subprocess metrics report diff --git a/.changeset/honest-kings-allow.md b/.changeset/honest-kings-allow.md new file mode 100644 index 0000000000000..7ab8d0abc1a83 --- /dev/null +++ b/.changeset/honest-kings-allow.md @@ -0,0 +1,5 @@ +--- +'@rocket.chat/apps-engine': patch +--- + +Attempts to restart an app subprocess if the spawn command fails diff --git a/.changeset/quiet-radios-fry.md b/.changeset/quiet-radios-fry.md new file mode 100644 index 0000000000000..b3b7209cb0418 --- /dev/null +++ b/.changeset/quiet-radios-fry.md @@ -0,0 +1,5 @@ +--- +'@rocket.chat/apps-engine': patch +--- + +Fixes an issue while collecting the error message from a failed restart attempt of an app subprocess diff --git a/.changeset/young-dots-cheat.md b/.changeset/young-dots-cheat.md new file mode 100644 index 0000000000000..e8d3b6c5bff64 --- /dev/null +++ b/.changeset/young-dots-cheat.md @@ -0,0 +1,5 @@ +--- +'@rocket.chat/apps-engine': patch +--- + +Prevents app:getStatus requests from timing out in some cases diff --git a/packages/apps-engine/deno-runtime/handlers/app/handler.ts b/packages/apps-engine/deno-runtime/handlers/app/handler.ts index 2a44f34cb7fea..141e145df9719 100644 --- a/packages/apps-engine/deno-runtime/handlers/app/handler.ts +++ b/packages/apps-engine/deno-runtime/handlers/app/handler.ts @@ -19,41 +19,41 @@ import handleOnUpdate from './handleOnUpdate.ts'; export default async function handleApp(method: string, params: unknown): Promise { const [, appMethod] = method.split(':'); - // We don't want the getStatus method to generate logs, so we handle it separately - if (appMethod === 'getStatus') { - return handleGetStatus(); - } + try { + // We don't want the getStatus method to generate logs, so we handle it separately + if (appMethod === 'getStatus') { + return await handleGetStatus(); + } - // `app` will be undefined if the method here is "app:construct" - const app = AppObjectRegistry.get('app'); + // `app` will be undefined if the method here is "app:construct" + const app = AppObjectRegistry.get('app'); - app?.getLogger().debug(`'${appMethod}' is being called...`); + app?.getLogger().debug(`'${appMethod}' is being called...`); - if (uikitInteractions.includes(appMethod)) { - return handleUIKitInteraction(appMethod, params).then((result) => { - if (result instanceof JsonRpcError) { - app?.getLogger().debug(`'${appMethod}' was unsuccessful.`, result.message); - } else { - app?.getLogger().debug(`'${appMethod}' was successfully called! The result is:`, result); - } + if (uikitInteractions.includes(appMethod)) { + return handleUIKitInteraction(appMethod, params).then((result) => { + if (result instanceof JsonRpcError) { + app?.getLogger().debug(`'${appMethod}' was unsuccessful.`, result.message); + } else { + app?.getLogger().debug(`'${appMethod}' was successfully called! The result is:`, result); + } - return result; - }); - } + return result; + }); + } - if (appMethod.startsWith('check') || appMethod.startsWith('execute')) { - return handleListener(appMethod, params).then((result) => { - if (result instanceof JsonRpcError) { - app?.getLogger().debug(`'${appMethod}' was unsuccessful.`, result.message); - } else { - app?.getLogger().debug(`'${appMethod}' was successfully called! The result is:`, result); - } + if (appMethod.startsWith('check') || appMethod.startsWith('execute')) { + return handleListener(appMethod, params).then((result) => { + if (result instanceof JsonRpcError) { + app?.getLogger().debug(`'${appMethod}' was unsuccessful.`, result.message); + } else { + app?.getLogger().debug(`'${appMethod}' was successfully called! The result is:`, result); + } - return result; - }); - } + return result; + }); + } - try { let result: Defined | JsonRpcError; switch (appMethod) { diff --git a/packages/apps-engine/deno-runtime/lib/messenger.ts b/packages/apps-engine/deno-runtime/lib/messenger.ts index 1e9ffe05c6c55..5881d408c01ce 100644 --- a/packages/apps-engine/deno-runtime/lib/messenger.ts +++ b/packages/apps-engine/deno-runtime/lib/messenger.ts @@ -59,6 +59,10 @@ export const Queue = new (class Queue { this.queue.push(encoder.encode(message)); this.processQueue(); } + + public getCurrentSize() { + return this.queue.length; + } }); export const Transport = new (class Transporter { diff --git a/packages/apps-engine/deno-runtime/lib/metricsCollector.ts b/packages/apps-engine/deno-runtime/lib/metricsCollector.ts new file mode 100644 index 0000000000000..c257b6c8a35b6 --- /dev/null +++ b/packages/apps-engine/deno-runtime/lib/metricsCollector.ts @@ -0,0 +1,20 @@ +import { writeAll } from "https://deno.land/std@0.216.0/io/write_all.ts"; +import { Queue } from "./messenger.ts"; + +export function collectMetrics() { + return { + queueSize: Queue.getCurrentSize(), + } +}; + +const encoder = new TextEncoder(); + +export async function sendMetrics() { + const metrics = collectMetrics(); + + await writeAll(Deno.stderr, encoder.encode(JSON.stringify(metrics))); +} + +export function startMetricsReport() { + setInterval(sendMetrics, 5000); +} diff --git a/packages/apps-engine/deno-runtime/main.ts b/packages/apps-engine/deno-runtime/main.ts index fa28229089546..3983c8d524078 100644 --- a/packages/apps-engine/deno-runtime/main.ts +++ b/packages/apps-engine/deno-runtime/main.ts @@ -22,6 +22,7 @@ import apiHandler from './handlers/api-handler.ts'; import handleApp from './handlers/app/handler.ts'; import handleScheduler from './handlers/scheduler-handler.ts'; import registerErrorListeners from './error-handlers.ts'; +import { startMetricsReport } from "./lib/metricsCollector.ts"; type Handlers = { app: typeof handleApp; @@ -130,3 +131,5 @@ async function main() { registerErrorListeners(); main(); + +startMetricsReport(); diff --git a/packages/apps-engine/src/server/runtime/deno/AppsEngineDenoRuntime.ts b/packages/apps-engine/src/server/runtime/deno/AppsEngineDenoRuntime.ts index f28dbecf63bbb..75209d2e60dab 100644 --- a/packages/apps-engine/src/server/runtime/deno/AppsEngineDenoRuntime.ts +++ b/packages/apps-engine/src/server/runtime/deno/AppsEngineDenoRuntime.ts @@ -294,7 +294,7 @@ export class DenoRuntimeSubprocessController extends EventEmitter { logger.info('Successfully restarted app subprocess'); } catch (e) { - logger.error("Failed to restart app's subprocess", { error: e }); + logger.error("Failed to restart app's subprocess", { error: e.message || e }); } finally { await this.logStorage.storeEntries(AppConsole.toStorageEntry(this.getAppId(), logger)); } @@ -321,18 +321,24 @@ export class DenoRuntimeSubprocessController extends EventEmitter { } private waitUntilReady(): Promise { + if (this.state === 'ready') { + return; + } + return new Promise((resolve, reject) => { - const timeoutId = setTimeout(() => reject(new Error(`[${this.getAppId()}] Timeout: app process not ready`)), this.options.timeout); + let timeoutId: NodeJS.Timeout; - if (this.state === 'ready') { + const handler = () => { clearTimeout(timeoutId); - return resolve(); - } + resolve(); + }; - this.once('ready', () => { - clearTimeout(timeoutId); - return resolve(); - }); + timeoutId = setTimeout(() => { + this.off('ready', handler); + reject(new Error(`[${this.getAppId()}] Timeout: app process not ready`)); + }, this.options.timeout); + + this.once('ready', handler); }); } @@ -636,6 +642,12 @@ export class DenoRuntimeSubprocessController extends EventEmitter { } private async parseError(chunk: Buffer): Promise { - console.error('Subprocess stderr', chunk.toString()); + try { + const data = JSON.parse(chunk.toString()); + + this.debug('Metrics received from subprocess: %o', data); + } catch (e) { + console.error('Subprocess stderr', chunk.toString()); + } } } diff --git a/packages/apps-engine/src/server/runtime/deno/LivenessManager.ts b/packages/apps-engine/src/server/runtime/deno/LivenessManager.ts index dc89acc8718b8..c1aa3eda377e3 100644 --- a/packages/apps-engine/src/server/runtime/deno/LivenessManager.ts +++ b/packages/apps-engine/src/server/runtime/deno/LivenessManager.ts @@ -82,6 +82,7 @@ export class LivenessManager { this.controller.once('ready', () => this.ping()); this.subprocess.once('exit', this.handleExit.bind(this)); + this.subprocess.once('error', this.handleError.bind(this)); } /** @@ -155,6 +156,11 @@ export class LivenessManager { this.messenger.send(COMMAND_PING); } + private handleError(err: Error) { + this.debug('App has failed to start.`', err); + this.restartProcess(); + } + private handleExit(exitCode: number, signal: string) { this.pingAbortController.emit('abort'); @@ -174,21 +180,22 @@ export class LivenessManager { this.restartProcess(); } - private restartProcess() { + private async restartProcess() { if (this.restartCount >= this.options.maxRestarts) { this.debug('Limit of restarts reached (%d). Aborting restart...', this.options.maxRestarts); this.controller.stopApp(); return; } - this.pingTimeoutConsecutiveCount = 0; - this.restartCount++; this.restartLog.push({ restartedAt: new Date(), source: 'liveness-manager', pid: this.subprocess.pid, }); - this.controller.restartApp(); + await this.controller.restartApp(); + + this.pingTimeoutConsecutiveCount = 0; + this.restartCount++; } } diff --git a/packages/apps-engine/src/server/runtime/deno/ProcessMessenger.ts b/packages/apps-engine/src/server/runtime/deno/ProcessMessenger.ts index 03d03d1253237..c919adb5f0bb4 100644 --- a/packages/apps-engine/src/server/runtime/deno/ProcessMessenger.ts +++ b/packages/apps-engine/src/server/runtime/deno/ProcessMessenger.ts @@ -1,11 +1,11 @@ -import { ChildProcess } from 'child_process'; +import type { ChildProcess } from 'child_process'; import type { JsonRpc } from 'jsonrpc-lite'; import { encoder } from './codec'; export class ProcessMessenger { - private deno: ChildProcess; + private deno: ChildProcess | undefined; private _sendStrategy: (message: JsonRpc) => void; @@ -30,7 +30,7 @@ export class ProcessMessenger { } private switchStrategy() { - if (this.deno instanceof ChildProcess) { + if (this.deno?.stdin?.writable) { this._sendStrategy = this.strategySend.bind(this); } else { this._sendStrategy = this.strategyError.bind(this);