diff --git a/.buildkite/ftr_platform_stateful_configs.yml b/.buildkite/ftr_platform_stateful_configs.yml index f64d8d0b5c1f1..52baaf124312f 100644 --- a/.buildkite/ftr_platform_stateful_configs.yml +++ b/.buildkite/ftr_platform_stateful_configs.yml @@ -166,6 +166,7 @@ enabled: - x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group3/config.ts - x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/config.ts - x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group4/config_with_schedule_circuit_breaker.ts + - x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group5/config.ts - x-pack/platform/test/alerting_api_integration/spaces_only/tests/actions/config.ts - x-pack/platform/test/alerting_api_integration/spaces_only/tests/action_task_params/config.ts - x-pack/platform/test/alerting_api_integration/spaces_only/tests/actions/connector_types/stack/email_recipient_allowlist/config.ts diff --git a/x-pack/platform/plugins/shared/alerting/server/application/rule/methods/create/create_rule.test.ts b/x-pack/platform/plugins/shared/alerting/server/application/rule/methods/create/create_rule.test.ts index 28702637a84f8..20f11a89cee9f 100644 --- a/x-pack/platform/plugins/shared/alerting/server/application/rule/methods/create/create_rule.test.ts +++ b/x-pack/platform/plugins/shared/alerting/server/application/rule/methods/create/create_rule.test.ts @@ -517,6 +517,7 @@ describe('create()', () => { "metrics": Object { "duration": 0, "gap_duration_s": null, + "gap_range": null, "total_alerts_created": null, "total_alerts_detected": null, "total_indexing_duration_ms": null, @@ -754,6 +755,7 @@ describe('create()', () => { "metrics": Object { "duration": 0, "gap_duration_s": null, + "gap_range": null, "total_alerts_created": null, "total_alerts_detected": null, "total_indexing_duration_ms": null, @@ -1489,8 +1491,7 @@ describe('create()', () => { metrics: { duration: 0, gap_duration_s: null, - // TODO: uncomment after intermidiate release - // gap_range: null, + gap_range: null, total_alerts_created: null, total_alerts_detected: null, total_indexing_duration_ms: null, @@ -2642,8 +2643,7 @@ describe('create()', () => { metrics: { duration: 0, gap_duration_s: null, - // TODO: uncomment after intermidiate release - // gap_range: null, + gap_range: null, total_alerts_created: null, total_alerts_detected: null, total_indexing_duration_ms: null, diff --git a/x-pack/platform/plugins/shared/alerting/server/lib/alerting_event_logger/alerting_event_logger.ts b/x-pack/platform/plugins/shared/alerting/server/lib/alerting_event_logger/alerting_event_logger.ts index 32186be7ea395..d936d89c26fb2 100644 --- a/x-pack/platform/plugins/shared/alerting/server/lib/alerting_event_logger/alerting_event_logger.ts +++ b/x-pack/platform/plugins/shared/alerting/server/lib/alerting_event_logger/alerting_event_logger.ts @@ -6,6 +6,7 @@ */ import * as uuid from 'uuid'; +import { set } from '@kbn/safer-lodash-set'; import type { IEvent, IEventLogger, InternalFields } from '@kbn/event-log-plugin/server'; import { millisToNanos, SAVED_OBJECT_REL_PRIMARY } from '@kbn/event-log-plugin/server'; import type { BulkResponse } from '@elastic/elasticsearch/lib/api/types'; @@ -13,7 +14,11 @@ import { EVENT_LOG_ACTIONS } from '../../plugin'; import type { UntypedNormalizedRuleType } from '../../rule_type_registry'; import { RULE_SAVED_OBJECT_TYPE } from '../../saved_objects'; import type { TaskRunnerTimings } from '../../task_runner/task_runner_timer'; -import type { AlertInstanceState, RuleExecutionStatus } from '../../types'; +import type { + AlertInstanceState, + ConsumerExecutionMetrics, + RuleExecutionStatus, +} from '../../types'; import { createAlertEventLogRecordObject } from '../create_alert_event_log_record_object'; import type { RuleRunMetrics } from '../rule_run_metrics_store'; import { Gap } from '../rule_gaps/gap'; @@ -24,6 +29,7 @@ const Millis2Nanos = 1000 * 1000; export interface RuleContext { id: string; + uuid?: string; type: UntypedNormalizedRuleType; consumer?: string; name?: string; @@ -58,6 +64,7 @@ interface DoneOpts { timings?: TaskRunnerTimings; status?: RuleExecutionStatus; metrics?: RuleRunMetrics | null; + consumerMetrics?: Partial | null; backfill?: BackfillOpts; } @@ -214,12 +221,14 @@ export class AlertingEventLogger { public addOrUpdateRuleData({ name, id, + uuid: ruleUuid, consumer, type, revision, }: { name?: string; id?: string; + uuid?: string; consumer?: string; revision?: number; type?: UntypedNormalizedRuleType; @@ -236,6 +245,10 @@ export class AlertingEventLogger { }; } + if (ruleUuid) { + this.ruleData.uuid = ruleUuid; + } + if (name) { this.ruleData.name = name; } @@ -266,6 +279,7 @@ export class AlertingEventLogger { updateEventWithRuleData(this.event, { ruleName: name, ruleId: id, + ruleUuid, ruleType: type, consumer, revision, @@ -345,7 +359,7 @@ export class AlertingEventLogger { ); } - public done({ status, metrics, timings, backfill }: DoneOpts) { + public done({ status, metrics, consumerMetrics, timings, backfill }: DoneOpts) { if (!this.isInitialized || !this.event || !this.context) { throw new Error('AlertingEventLogger not initialized'); } @@ -385,6 +399,10 @@ export class AlertingEventLogger { updateEvent(this.event, { metrics }); } + if (consumerMetrics) { + updateEvent(this.event, { consumerMetrics }); + } + if (timings) { updateEvent(this.event, { timings }); } @@ -607,6 +625,7 @@ interface UpdateEventOpts { status?: string; reason?: string; metrics?: RuleRunMetrics; + consumerMetrics?: Partial; timings?: TaskRunnerTimings; backfill?: BackfillOpts; maintenanceWindowIds?: string[]; @@ -615,6 +634,7 @@ interface UpdateEventOpts { interface UpdateRuleOpts { ruleName?: string; ruleId?: string; + ruleUuid?: string; consumer?: string; ruleType?: UntypedNormalizedRuleType; revision?: number; @@ -622,7 +642,7 @@ interface UpdateRuleOpts { } export function updateEventWithRuleData(event: IEvent, opts: UpdateRuleOpts) { - const { ruleName, ruleId, consumer, ruleType, revision, savedObjects } = opts; + const { ruleName, ruleId, ruleUuid, consumer, ruleType, revision, savedObjects } = opts; if (!event) { throw new Error('Cannot update event because it is not initialized.'); } @@ -641,6 +661,13 @@ export function updateEventWithRuleData(event: IEvent, opts: UpdateRuleOpts) { }; } + if (ruleUuid) { + event.rule = { + ...event.rule, + uuid: ruleUuid, + }; + } + if (consumer) { event.kibana = event.kibana || {}; event.kibana.alert = event.kibana.alert || {}; @@ -673,7 +700,8 @@ export function updateEventWithRuleData(event: IEvent, opts: UpdateRuleOpts) { } } - if (revision) { + // revision is a non-negative integer. We'd like to capture 0 as well. + if (revision !== undefined) { event.kibana = event.kibana || {}; event.kibana.alert = event.kibana.alert || {}; event.kibana.alert.rule = event.kibana.alert.rule || {}; @@ -700,6 +728,7 @@ export function updateEvent(event: IEvent, opts: UpdateEventOpts) { status, reason, metrics, + consumerMetrics, timings, alertingOutcome, backfill, @@ -764,6 +793,19 @@ export function updateEvent(event: IEvent, opts: UpdateEventOpts) { }; } + if (consumerMetrics) { + set(event, 'kibana.alert.rule.execution.metrics', { + ...event.kibana?.alert?.rule?.execution?.metrics, + alerts_candidate_count: consumerMetrics.alerts_candidate_count, + alerts_suppressed_count: consumerMetrics.alerts_suppressed_count, + frozen_indices_queried_count: consumerMetrics.frozen_indices_queried_count, + total_indexing_duration_ms: consumerMetrics.total_indexing_duration_ms, + total_enrichment_duration_ms: consumerMetrics.total_enrichment_duration_ms, + execution_gap_duration_s: consumerMetrics.gap_duration_s, + gap_range: consumerMetrics.gap_range, + }); + } + if (backfill) { event.kibana = event.kibana || {}; event.kibana.alert = event.kibana.alert || {}; diff --git a/x-pack/platform/plugins/shared/alerting/server/lib/monitoring.test.ts b/x-pack/platform/plugins/shared/alerting/server/lib/monitoring.test.ts index 23509465679ef..e3893bf8123c4 100644 --- a/x-pack/platform/plugins/shared/alerting/server/lib/monitoring.test.ts +++ b/x-pack/platform/plugins/shared/alerting/server/lib/monitoring.test.ts @@ -107,8 +107,7 @@ describe('resetMonitoringLastRun', () => { total_alerts_detected: null, total_alerts_created: null, gap_duration_s: null, - // TODO: uncomment after intermidiate release - // gap_range: null, + gap_range: null, }); }); diff --git a/x-pack/platform/plugins/shared/alerting/server/lib/monitoring.ts b/x-pack/platform/plugins/shared/alerting/server/lib/monitoring.ts index b691cab14ec63..0714938e54f70 100644 --- a/x-pack/platform/plugins/shared/alerting/server/lib/monitoring.ts +++ b/x-pack/platform/plugins/shared/alerting/server/lib/monitoring.ts @@ -21,8 +21,7 @@ const INITIAL_LAST_RUN_METRICS: RuleMonitoringLastRunMetrics = { total_alerts_detected: null, total_alerts_created: null, gap_duration_s: null, - // TODO: uncomment after intermidiate release - // gap_range: null, + gap_range: null, }; export const getDefaultMonitoring = (timestamp: string): RawRuleMonitoring => { diff --git a/x-pack/platform/plugins/shared/alerting/server/mocks.ts b/x-pack/platform/plugins/shared/alerting/server/mocks.ts index e672e95de8996..713f68ef99a5a 100644 --- a/x-pack/platform/plugins/shared/alerting/server/mocks.ts +++ b/x-pack/platform/plugins/shared/alerting/server/mocks.ts @@ -133,11 +133,9 @@ const createAbortableSearchServiceMock = () => { const createRuleMonitoringServiceMock = () => { const mock = lazyObject({ - setLastRunMetricsTotalSearchDurationMs: jest.fn(), - setLastRunMetricsTotalIndexingDurationMs: jest.fn(), - setLastRunMetricsTotalAlertsDetected: jest.fn(), - setLastRunMetricsTotalAlertsCreated: jest.fn(), - setLastRunMetricsGapDurationS: jest.fn(), + setMetric: jest.fn(), + setMetrics: jest.fn(), + clearGapRange: jest.fn(), }) as unknown as jest.Mocked; return mock; diff --git a/x-pack/platform/plugins/shared/alerting/server/monitoring/rule_monitoring_service.mock.ts b/x-pack/platform/plugins/shared/alerting/server/monitoring/rule_monitoring_service.mock.ts index 9fc3d44ef1e9b..e428005871ece 100644 --- a/x-pack/platform/plugins/shared/alerting/server/monitoring/rule_monitoring_service.mock.ts +++ b/x-pack/platform/plugins/shared/alerting/server/monitoring/rule_monitoring_service.mock.ts @@ -8,8 +8,9 @@ function createRuleMonitoringServiceMock() { return jest.fn().mockImplementation(() => { return { + addFrameworkMetrics: jest.fn(), addHistory: jest.fn(), - getLastRunMetricsSetters: jest.fn(), + getSetters: jest.fn(), getMonitoring: jest.fn(), setLastRunMetricsDuration: jest.fn(), setMonitoring: jest.fn(), @@ -20,12 +21,9 @@ function createRuleMonitoringServiceMock() { function createPublicRuleMonitoringServiceMock() { return jest.fn().mockImplementation(() => { return { - setLastRunMetricsGapDurationS: jest.fn(), - setLastRunMetricsTotalAlertsCreated: jest.fn(), - setLastRunMetricsTotalAlertsDetected: jest.fn(), - setLastRunMetricsTotalIndexingDurationMs: jest.fn(), - setLastRunMetricsTotalSearchDurationMs: jest.fn(), - setLastRunMetricsGapRange: jest.fn(), + setMetric: jest.fn(), + setMetrics: jest.fn(), + clearGapRange: jest.fn(), }; }); } diff --git a/x-pack/platform/plugins/shared/alerting/server/monitoring/rule_monitoring_service.test.ts b/x-pack/platform/plugins/shared/alerting/server/monitoring/rule_monitoring_service.test.ts index decefd779e461..36edf18509962 100644 --- a/x-pack/platform/plugins/shared/alerting/server/monitoring/rule_monitoring_service.test.ts +++ b/x-pack/platform/plugins/shared/alerting/server/monitoring/rule_monitoring_service.test.ts @@ -95,6 +95,10 @@ describe('RuleMonitoringService', () => { timestamp: mockNow, metrics: { duration: 100000, + gap_duration_s: null, + gap_range: null, + total_indexing_duration_ms: null, + total_search_duration_ms: null, }, }, }, @@ -103,90 +107,110 @@ describe('RuleMonitoringService', () => { expect(ruleMonitoringService.getMonitoring()).toEqual(customMonitoring); }); - it('should set totalSearchDurationMs', () => { + it('should set totalIndexDurationMs', () => { const ruleMonitoringService = new RuleMonitoringService(); - const { setLastRunMetricsTotalSearchDurationMs } = - ruleMonitoringService.getLastRunMetricsSetters(); - setLastRunMetricsTotalSearchDurationMs(123); + const { setMetric } = ruleMonitoringService.getSetters(); + + setMetric('total_indexing_duration_ms', 234); const { run: { last_run: { metrics }, }, } = ruleMonitoringService.getMonitoring(); - expect(metrics.total_search_duration_ms).toEqual(123); + expect(metrics.total_indexing_duration_ms).toEqual(234); }); - it('should set totalIndexDurationMs', () => { + it('should set gapDurationS', () => { const ruleMonitoringService = new RuleMonitoringService(); - const { setLastRunMetricsTotalIndexingDurationMs } = - ruleMonitoringService.getLastRunMetricsSetters(); - setLastRunMetricsTotalIndexingDurationMs(234); + const { setMetric } = ruleMonitoringService.getSetters(); + + setMetric('gap_duration_s', 567); const { run: { last_run: { metrics }, }, } = ruleMonitoringService.getMonitoring(); - expect(metrics.total_indexing_duration_ms).toEqual(234); + expect(metrics.gap_duration_s).toEqual(567); }); - it('should set totalAlertsDetected', () => { + it('should set gapRange', () => { const ruleMonitoringService = new RuleMonitoringService(); - const { setLastRunMetricsTotalAlertsDetected } = - ruleMonitoringService.getLastRunMetricsSetters(); - setLastRunMetricsTotalAlertsDetected(345); + const { setMetric } = ruleMonitoringService.getSetters(); + + setMetric('gap_range', { + gte: '2020-01-01T00:00:00.000Z', + lte: '2020-01-01T01:00:00.000Z', + }); const { run: { last_run: { metrics }, }, } = ruleMonitoringService.getMonitoring(); - expect(metrics.total_alerts_detected).toEqual(345); + expect(metrics.gap_range?.gte).toEqual('2020-01-01T00:00:00.000Z'); + expect(metrics.gap_range?.lte).toEqual('2020-01-01T01:00:00.000Z'); }); - it('should set totalAlertsCreated', () => { + it('should clear the previously set gapRange', () => { const ruleMonitoringService = new RuleMonitoringService(); - const { setLastRunMetricsTotalAlertsCreated } = - ruleMonitoringService.getLastRunMetricsSetters(); - setLastRunMetricsTotalAlertsCreated(456); + const { setMetric, clearGapRange } = ruleMonitoringService.getSetters(); + + setMetric('gap_range', { + gte: '2020-01-01T00:00:00.000Z', + lte: '2020-01-01T01:00:00.000Z', + }); const { run: { - last_run: { metrics }, + last_run: { metrics: metricBeforeClearing }, }, } = ruleMonitoringService.getMonitoring(); - expect(metrics.total_alerts_created).toEqual(456); - }); - it('should set gapDurationS', () => { - const ruleMonitoringService = new RuleMonitoringService(); - const { setLastRunMetricsGapDurationS } = ruleMonitoringService.getLastRunMetricsSetters(); - setLastRunMetricsGapDurationS(567); + expect(metricBeforeClearing.gap_range).toBeDefined(); + + clearGapRange(); const { run: { - last_run: { metrics }, + last_run: { metrics: metricsAfterClearing }, }, } = ruleMonitoringService.getMonitoring(); - expect(metrics.gap_duration_s).toEqual(567); + + expect(metricsAfterClearing.gap_range).toBeNull(); }); + }); - it('should set gapRange', () => { + describe('addFrameworkMetrics', () => { + it('exposes total_search_duration_ms via getMonitoring() for the rule saved object', () => { const ruleMonitoringService = new RuleMonitoringService(); - const { setLastRunMetricsGapRange } = ruleMonitoringService.getLastRunMetricsSetters(); - setLastRunMetricsGapRange({ - gte: '2020-01-01T00:00:00.000Z', - lte: '2020-01-01T01:00:00.000Z', - }); + ruleMonitoringService.addFrameworkMetrics({ total_search_duration_ms: 42 }); - const { - run: { - last_run: { metrics }, - }, - } = ruleMonitoringService.getMonitoring(); - expect(metrics.gap_range?.gte).toEqual('2020-01-01T00:00:00.000Z'); - expect(metrics.gap_range?.lte).toEqual('2020-01-01T01:00:00.000Z'); + expect( + ruleMonitoringService.getMonitoring().run.last_run.metrics.total_search_duration_ms + ).toBe(42); + }); + + it('replaces prior framework total_search_duration_ms', () => { + const ruleMonitoringService = new RuleMonitoringService(); + ruleMonitoringService.addFrameworkMetrics({ total_search_duration_ms: 10 }); + ruleMonitoringService.addFrameworkMetrics({ total_search_duration_ms: 20 }); + + expect( + ruleMonitoringService.getMonitoring().run.last_run.metrics.total_search_duration_ms + ).toBe(20); + }); + + it('preserves executor metrics when addFrameworkMetrics sets total_search_duration_ms', () => { + const ruleMonitoringService = new RuleMonitoringService(); + const { setMetric } = ruleMonitoringService.getSetters(); + setMetric('total_indexing_duration_ms', 99); + ruleMonitoringService.addFrameworkMetrics({ total_search_duration_ms: 42 }); + + const { metrics } = ruleMonitoringService.getMonitoring().run.last_run; + expect(metrics.total_search_duration_ms).toBe(42); + expect(metrics.total_indexing_duration_ms).toBe(99); }); }); }); diff --git a/x-pack/platform/plugins/shared/alerting/server/monitoring/rule_monitoring_service.ts b/x-pack/platform/plugins/shared/alerting/server/monitoring/rule_monitoring_service.ts index ebe6db019758d..ac9e895527bd1 100644 --- a/x-pack/platform/plugins/shared/alerting/server/monitoring/rule_monitoring_service.ts +++ b/x-pack/platform/plugins/shared/alerting/server/monitoring/rule_monitoring_service.ts @@ -5,11 +5,26 @@ * 2.0. */ +import { cloneDeep } from 'lodash'; import { getDefaultMonitoring, getExecutionDurationPercentiles } from '../lib/monitoring'; -import type { RuleMonitoring, RuleMonitoringHistory, PublicRuleMonitoringService } from '../types'; +import type { + RuleMonitoring, + RuleMonitoringHistory, + PublicRuleMonitoringService, + ConsumerExecutionMetrics, +} from '../types'; + +interface FrameworkMetrics { + total_search_duration_ms: number; +} export class RuleMonitoringService { + // Mirrors rule's SO state private monitoring: RuleMonitoring = getDefaultMonitoring(new Date().toISOString()); + // Metrics calculated by the framework + private frameworkMetrics: Partial = {}; + // Rule executor metrics. Essential metrics get written to rule's SO. + private metrics: Partial = {}; public setLastRunMetricsDuration(duration: number) { this.monitoring.run.last_run.metrics.duration = duration; @@ -22,7 +37,26 @@ export class RuleMonitoringService { } public getMonitoring(): RuleMonitoring { - return this.monitoring; + const result = cloneDeep(this.monitoring); + + Object.assign(result.run.last_run.metrics, { + total_search_duration_ms: this.frameworkMetrics.total_search_duration_ms ?? null, + total_indexing_duration_ms: this.metrics.total_indexing_duration_ms ?? null, + gap_duration_s: this.metrics.gap_duration_s ?? null, + gap_range: this.metrics.gap_range ?? null, + }); + + return result; + } + + public getExecutorMetrics(): Partial | undefined { + if (Object.values(this.metrics).some((v) => v != null)) { + return this.metrics; + } + } + + public addFrameworkMetrics(fwkMetrics: Partial): void { + this.frameworkMetrics = fwkMetrics; } public addHistory({ @@ -54,41 +88,16 @@ export class RuleMonitoringService { }; } - public getLastRunMetricsSetters(): PublicRuleMonitoringService { + public getSetters(): PublicRuleMonitoringService { return { - setLastRunMetricsTotalSearchDurationMs: - this.setLastRunMetricsTotalSearchDurationMs.bind(this), - setLastRunMetricsTotalIndexingDurationMs: - this.setLastRunMetricsTotalIndexingDurationMs.bind(this), - setLastRunMetricsTotalAlertsDetected: this.setLastRunMetricsTotalAlertsDetected.bind(this), - setLastRunMetricsTotalAlertsCreated: this.setLastRunMetricsTotalAlertsCreated.bind(this), - setLastRunMetricsGapDurationS: this.setLastRunMetricsGapDurationS.bind(this), - setLastRunMetricsGapRange: this.setLastRunMetricsGapRange.bind(this), + setMetric: this.setMetric.bind(this), + setMetrics: this.setMetrics.bind(this), + clearGapRange: this.clearGapRange.bind(this), }; } - private setLastRunMetricsTotalSearchDurationMs(totalSearchDurationMs: number) { - this.monitoring.run.last_run.metrics.total_search_duration_ms = totalSearchDurationMs; - } - - private setLastRunMetricsTotalIndexingDurationMs(totalIndexingDurationMs: number) { - this.monitoring.run.last_run.metrics.total_indexing_duration_ms = totalIndexingDurationMs; - } - - private setLastRunMetricsTotalAlertsDetected(totalAlertDetected: number) { - this.monitoring.run.last_run.metrics.total_alerts_detected = totalAlertDetected; - } - - private setLastRunMetricsTotalAlertsCreated(totalAlertCreated: number) { - this.monitoring.run.last_run.metrics.total_alerts_created = totalAlertCreated; - } - - private setLastRunMetricsGapDurationS(gapDurationS: number) { - this.monitoring.run.last_run.metrics.gap_duration_s = gapDurationS; - } - - private setLastRunMetricsGapRange(gap: { lte: string; gte: string } | null) { - this.monitoring.run.last_run.metrics.gap_range = gap; + private clearGapRange(): void { + delete this.metrics.gap_range; } private buildExecutionSuccessRatio() { @@ -100,4 +109,17 @@ export class RuleMonitoringService { const { history } = this.monitoring.run; return getExecutionDurationPercentiles(history); }; + + private setMetric = ( + metricName: MetricName, + value: ConsumerExecutionMetrics[MetricName] + ) => { + this.metrics[metricName] = value; + }; + + private setMetrics = (metrics: Partial) => { + for (const [metricName, value] of Object.entries(metrics)) { + this.setMetric(metricName as keyof ConsumerExecutionMetrics, value); + } + }; } diff --git a/x-pack/platform/plugins/shared/alerting/server/task_runner/ad_hoc_task_runner.test.ts b/x-pack/platform/plugins/shared/alerting/server/task_runner/ad_hoc_task_runner.test.ts index 5a3cde0605b6e..0d958321e0c32 100644 --- a/x-pack/platform/plugins/shared/alerting/server/task_runner/ad_hoc_task_runner.test.ts +++ b/x-pack/platform/plugins/shared/alerting/server/task_runner/ad_hoc_task_runner.test.ts @@ -29,6 +29,7 @@ import type { ConcreteTaskInstance } from '@kbn/task-manager-plugin/server'; import { TaskPriority, TaskStatus } from '@kbn/task-manager-plugin/server'; import { usageCountersServiceMock } from '@kbn/usage-collection-plugin/server/usage_counters/usage_counters_service.mock'; import { AdHocTaskRunner } from './ad_hoc_task_runner'; +import { RuleMonitoringService } from '../monitoring/rule_monitoring_service'; import type { TaskRunnerContext } from './types'; import { ApiKeyType } from './types'; import { backfillClientMock } from '../backfill_client/backfill_client.mock'; @@ -415,6 +416,10 @@ describe('Ad Hoc Task Runner', () => { afterAll(() => fakeTimer.restore()); test('successfully executes the task', async () => { + const addFrameworkMetricsSpy = jest.spyOn( + RuleMonitoringService.prototype, + 'addFrameworkMetrics' + ); ruleTypeWithAlerts.executor.mockImplementation( async ({ services: executorServices, @@ -444,6 +449,10 @@ describe('Ad Hoc Task Runner', () => { const runnerResult = await taskRunner.run(); expect(runnerResult).toEqual({ state: {}, runAt: new Date('1970-01-01T00:00:00.000Z') }); + expect(addFrameworkMetricsSpy).toHaveBeenCalledWith({ + total_search_duration_ms: 23423, + }); + addFrameworkMetricsSpy.mockRestore(); await taskRunner.cleanup(); // Verify all the expected calls were made before calling the rule executor @@ -584,6 +593,48 @@ describe('Ad Hoc Task Runner', () => { expect(logger.error).not.toHaveBeenCalled(); }); + test('passes consumer metrics to AlertingEventLogger', async () => { + const consumerMetrics = { + alerts_candidate_count: 100, + total_enrichment_duration_ms: 50, + }; + ruleTypeWithAlerts.executor.mockImplementation( + async ({ + services: executorServices, + }: RuleExecutorOptions< + RuleTypeParams, + RuleTypeState, + AlertInstanceState, + AlertInstanceContext, + string, + RuleAlertData + >) => { + executorServices.ruleMonitoringService?.setMetrics(consumerMetrics); + return { state: {} }; + } + ); + + const taskRunner = new AdHocTaskRunner({ + context: taskRunnerFactoryInitializerParams, + internalSavedObjectsRepository, + taskInstance: mockedTaskInstance, + }); + + await taskRunner.run(); + await taskRunner.cleanup(); + + expect(alertingEventLogger.done).toHaveBeenCalledWith( + expect.objectContaining({ + consumerMetrics, + backfill: { + id: mockedAdHocRunSO.id, + start: schedule1.runAt, + interval: schedule1.interval, + }, + }) + ); + }); + test('should schedule actions for rule with actions', async () => { const mockedAdHocRunSOWithActions = { ...mockedAdHocRunSO, diff --git a/x-pack/platform/plugins/shared/alerting/server/task_runner/ad_hoc_task_runner.ts b/x-pack/platform/plugins/shared/alerting/server/task_runner/ad_hoc_task_runner.ts index 7a15c68fc4217..5d3ff4631cdcc 100644 --- a/x-pack/platform/plugins/shared/alerting/server/task_runner/ad_hoc_task_runner.ts +++ b/x-pack/platform/plugins/shared/alerting/server/task_runner/ad_hoc_task_runner.ts @@ -396,6 +396,10 @@ export class AdHocTaskRunner implements CancellableTask { this.ruleId = rule.id; this.alertingEventLogger.addOrUpdateRuleData({ id: rule.id, + uuid: + ruleType.solution === 'security' && typeof rule.params.ruleId === 'string' + ? rule.params.ruleId + : undefined, type: ruleType, name: rule.name, consumer: rule.consumer, @@ -483,83 +487,100 @@ export class AdHocTaskRunner implements CancellableTask { } = this.taskInstance; const namespace = this.context.spaceIdToNamespace(spaceId); - const { executionStatus: execStatus, executionMetrics: execMetrics } = - await this.timer.runWithTimer(TaskRunnerTimerSpan.ProcessRuleRun, async () => { - const { executionStatus, executionMetrics, outcome } = processRunResults({ - result: this.ruleResult, - runDate: this.runDate, - runRuleResult, - }); + const result = await this.timer.runWithTimer(TaskRunnerTimerSpan.ProcessRuleRun, async () => { + const { executionStatus, executionMetrics, outcome } = processRunResults({ + result: this.ruleResult, + runDate: this.runDate, + runRuleResult, + }); - if (!isOk(runRuleResult)) { - const error = this.stackTraceLog ? this.stackTraceLog.message : runRuleResult.error; - const stack = this.stackTraceLog - ? this.stackTraceLog.stackTrace - : runRuleResult.error.stack; - const message = `Executing ad hoc run with id "${adHocRunParamsId}" has resulted in Error: ${getEsErrorMessage( - error - )} - ${stack ?? ''}`; - const tags = [adHocRunParamsId, 'rule-ad-hoc-run-failed']; - if (this.ruleTypeId.length > 0) { - tags.push(this.ruleTypeId); - } - if (this.ruleId.length > 0) { - tags.push(this.ruleId); - } - this.logger.error(message, { tags, error: { stack_trace: stack } }); + if (!isOk(runRuleResult)) { + const error = this.stackTraceLog ? this.stackTraceLog.message : runRuleResult.error; + const stack = this.stackTraceLog + ? this.stackTraceLog.stackTrace + : runRuleResult.error.stack; + const message = `Executing ad hoc run with id "${adHocRunParamsId}" has resulted in Error: ${getEsErrorMessage( + error + )} - ${stack ?? ''}`; + const tags = [adHocRunParamsId, 'rule-ad-hoc-run-failed']; + if (this.ruleTypeId.length > 0) { + tags.push(this.ruleTypeId); } - - if (apm.currentTransaction) { - apm.currentTransaction.setOutcome(outcome); + if (this.ruleId.length > 0) { + tags.push(this.ruleId); } + this.logger.error(message, { tags, error: { stack_trace: stack } }); + } - // set start and duration based on event log - const { start, duration } = this.alertingEventLogger.getStartAndDuration(); - if (null != start) { - executionStatus.lastExecutionDate = start; - } - if (null != duration) { - executionStatus.lastDuration = nanosToMillis(duration); - } + if (apm.currentTransaction) { + apm.currentTransaction.setOutcome(outcome); + apm.setCustomContext({ + execution_outcome: { + ...this.ruleMonitoring.getExecutorMetrics(), + error: executionStatus.error, + warning: executionStatus.warning, + }, + }); + } - if (this.scheduleToRunIndex > -1) { - let updatedStatus: AdHocRunStatus = adHocRunStatus.COMPLETE; - if (this.cancelled) { - updatedStatus = adHocRunStatus.TIMEOUT; - } else if (outcome === 'failure') { - updatedStatus = adHocRunStatus.ERROR; - } - this.adHocRunSchedule[this.scheduleToRunIndex].status = updatedStatus; - } + // set start and duration based on event log + const { start, duration } = this.alertingEventLogger.getStartAndDuration(); + if (null != start) { + executionStatus.lastExecutionDate = start; + } + if (null != duration) { + executionStatus.lastDuration = nanosToMillis(duration); + } - // If execution failed due to decrypt error, we should stop running the task - // If the user wants to rerun it, they can reschedule - // In the future, we can consider saving the task in an error state when we - // have one or both of the following abilities - // - ability to rerun a failed ad hoc run - // - ability to clean up failed ad hoc runs (either manually or automatically) - this.shouldDeleteTask = - executionStatus.status === 'error' && - (executionStatus?.error?.reason === RuleExecutionStatusErrorReasons.Decrypt || - executionStatus?.error?.reason === RuleExecutionStatusErrorReasons.Read || - executionStatus?.error?.reason === RuleExecutionStatusErrorReasons.License || - executionStatus?.error?.reason === RuleExecutionStatusErrorReasons.Validate); - - await this.updateAdHocRunSavedObjectPostRun(adHocRunParamsId, namespace, { - ...(this.shouldDeleteTask ? { status: adHocRunStatus.ERROR } : {}), - ...(this.scheduleToRunIndex > -1 ? { schedule: this.adHocRunSchedule } : {}), + if (executionMetrics) { + this.ruleMonitoring.addFrameworkMetrics({ + total_search_duration_ms: executionMetrics.totalSearchDurationMs, }); + } - if (startedAt) { - // Capture how long it took for the rule to run after being claimed - this.timer.setDuration(TaskRunnerTimerSpan.TotalRunDuration, startedAt); + if (this.scheduleToRunIndex > -1) { + let updatedStatus: AdHocRunStatus = adHocRunStatus.COMPLETE; + if (this.cancelled) { + updatedStatus = adHocRunStatus.TIMEOUT; + } else if (outcome === 'failure') { + updatedStatus = adHocRunStatus.ERROR; } + this.adHocRunSchedule[this.scheduleToRunIndex].status = updatedStatus; + } - return { executionStatus, executionMetrics }; + // If execution failed due to decrypt error, we should stop running the task + // If the user wants to rerun it, they can reschedule + // In the future, we can consider saving the task in an error state when we + // have one or both of the following abilities + // - ability to rerun a failed ad hoc run + // - ability to clean up failed ad hoc runs (either manually or automatically) + this.shouldDeleteTask = + executionStatus.status === 'error' && + (executionStatus?.error?.reason === RuleExecutionStatusErrorReasons.Decrypt || + executionStatus?.error?.reason === RuleExecutionStatusErrorReasons.Read || + executionStatus?.error?.reason === RuleExecutionStatusErrorReasons.License || + executionStatus?.error?.reason === RuleExecutionStatusErrorReasons.Validate); + + await this.updateAdHocRunSavedObjectPostRun(adHocRunParamsId, namespace, { + ...(this.shouldDeleteTask ? { status: adHocRunStatus.ERROR } : {}), + ...(this.scheduleToRunIndex > -1 ? { schedule: this.adHocRunSchedule } : {}), }); + + if (startedAt) { + // Capture how long it took for the rule to run after being claimed + this.timer.setDuration(TaskRunnerTimerSpan.TotalRunDuration, startedAt); + } + + return { + executionStatus, + executionMetrics, + consumerExecutionMetrics: this.ruleMonitoring.getExecutorMetrics(), + }; + }); this.alertingEventLogger.done({ - status: execStatus, - metrics: execMetrics, + status: result.executionStatus, + metrics: result.executionMetrics, + consumerMetrics: result.consumerExecutionMetrics, // in the future if we have other types of ad hoc runs (like preview) // we can differentiate and pass in different info backfill: { diff --git a/x-pack/platform/plugins/shared/alerting/server/task_runner/fixtures.ts b/x-pack/platform/plugins/shared/alerting/server/task_runner/fixtures.ts index 236e07f215719..19467b49404ff 100644 --- a/x-pack/platform/plugins/shared/alerting/server/task_runner/fixtures.ts +++ b/x-pack/platform/plugins/shared/alerting/server/task_runner/fixtures.ts @@ -74,6 +74,7 @@ export const generateRuleUpdateParams = ({ successRatio = 1, history = defaultHistory, alertsCount, + metrics, }: { error?: null | { reason: string; message: string }; warning?: null | { reason: string; message: string }; @@ -83,6 +84,7 @@ export const generateRuleUpdateParams = ({ successRatio?: number; history?: RuleMonitoring['run']['history']; alertsCount?: Record; + metrics?: Record; }) => [ { id: `alert:1`, @@ -100,12 +102,12 @@ export const generateRuleUpdateParams = ({ metrics: { duration: 0, gap_duration_s: null, - // TODO: uncomment after intermidiate release - // gap_range: null, + gap_range: null, total_alerts_created: null, total_alerts_detected: null, total_indexing_duration_ms: null, total_search_duration_ms: null, + ...metrics, }, }, }, diff --git a/x-pack/platform/plugins/shared/alerting/server/task_runner/get_executor_services.test.ts b/x-pack/platform/plugins/shared/alerting/server/task_runner/get_executor_services.test.ts index c3fb6f4f90614..4fbf1374271fb 100644 --- a/x-pack/platform/plugins/shared/alerting/server/task_runner/get_executor_services.test.ts +++ b/x-pack/platform/plugins/shared/alerting/server/task_runner/get_executor_services.test.ts @@ -76,7 +76,7 @@ describe('getExecutorServices', () => { }; const ruleMonitoringService = ruleMonitoringServiceMock.create(); const ruleResultService = ruleResultServiceMock.create(); - (ruleMonitoringService.getLastRunMetricsSetters as jest.Mock).mockReturnValue({}); + (ruleMonitoringService.getSetters as jest.Mock).mockReturnValue({}); (ruleResultService.getLastRunSetters as jest.Mock).mockReturnValue({}); beforeEach(() => { diff --git a/x-pack/platform/plugins/shared/alerting/server/task_runner/get_executor_services.ts b/x-pack/platform/plugins/shared/alerting/server/task_runner/get_executor_services.ts index 18763bfdfc98c..2d3b9ed079cac 100644 --- a/x-pack/platform/plugins/shared/alerting/server/task_runner/get_executor_services.ts +++ b/x-pack/platform/plugins/shared/alerting/server/task_runner/get_executor_services.ts @@ -87,7 +87,7 @@ export const getExecutorServices = (opts: GetExecutorServicesOpts): ExecutorServ const uiSettingsClient = context.uiSettings.asScopedToClient(savedObjectsClient); return { - ruleMonitoringService: opts.ruleMonitoringService.getLastRunMetricsSetters(), + ruleMonitoringService: opts.ruleMonitoringService.getSetters(), ruleResultService: opts.ruleResultService.getLastRunSetters(), savedObjectsClient, uiSettingsClient, diff --git a/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner.test.ts b/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner.test.ts index 84d427b0b58ff..cbf00015ed8e8 100644 --- a/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner.test.ts +++ b/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner.test.ts @@ -339,7 +339,11 @@ describe('Task Runner', () => { testAlertingEventLogCalls({ status: 'ok' }); expect(elasticsearchService.client.asInternalUser.update).toHaveBeenCalledWith( - ...generateRuleUpdateParams({}) + ...generateRuleUpdateParams({ + metrics: { + total_search_duration_ms: 23423, + }, + }) ); expect(taskRunnerFactoryInitializerParams.executionContext.withContext).toBeCalledTimes(1); @@ -358,6 +362,73 @@ describe('Task Runner', () => { ).toHaveBeenCalled(); }); + test('passes total_search_duration_ms from execution metrics into rule monitoring via addFrameworkMetrics', async () => { + const addFrameworkMetricsSpy = jest.spyOn( + RuleMonitoringService.prototype, + 'addFrameworkMetrics' + ); + const taskRunner = new TaskRunner({ + ruleType, + taskInstance: { + ...mockedTaskInstance, + state: { + ...mockedTaskInstance.state, + previousStartedAt: new Date(Date.now() - 5 * 60 * 1000).toISOString(), + }, + }, + context: taskRunnerFactoryInitializerParams, + inMemoryMetrics, + internalSavedObjectsRepository, + }); + + mockGetRuleFromRaw.mockReturnValue(mockedRuleTypeSavedObject as Rule); + encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue(mockedRawRuleSO); + + await taskRunner.run(); + + expect(addFrameworkMetricsSpy).toHaveBeenCalledWith({ + total_search_duration_ms: 23423, + }); + addFrameworkMetricsSpy.mockRestore(); + }); + + test('passes consumer metrics to AlertingEventLogger', async () => { + const consumerMetrics = { + alerts_candidate_count: 42, + alerts_suppressed_count: 7, + frozen_indices_queried_count: 3, + }; + ruleType.executor.mockImplementation(async ({ services: { ruleMonitoringService } }) => { + ruleMonitoringService?.setMetrics(consumerMetrics); + return { state: {} }; + }); + + const taskRunner = new TaskRunner({ + ruleType, + taskInstance: { + ...mockedTaskInstance, + state: { + ...mockedTaskInstance.state, + previousStartedAt: new Date(Date.now() - 5 * 60 * 1000).toISOString(), + }, + }, + context: taskRunnerFactoryInitializerParams, + inMemoryMetrics, + internalSavedObjectsRepository, + }); + + mockGetRuleFromRaw.mockReturnValue(mockedRuleTypeSavedObject as Rule); + encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue(mockedRawRuleSO); + + await taskRunner.run(); + + expect(alertingEventLogger.done).toHaveBeenCalledWith( + expect.objectContaining({ + consumerMetrics, + }) + ); + }); + test('should update the persisted alerts', async () => { taskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue(true); taskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue(true); @@ -2756,7 +2827,10 @@ describe('Task Runner', () => { await taskRunner.run(); expect(elasticsearchService.client.asInternalUser.update).toHaveBeenCalledWith( - ...generateRuleUpdateParams({ nextRun: '1970-01-01T00:00:10.000Z' }) + ...generateRuleUpdateParams({ + nextRun: '1970-01-01T00:00:10.000Z', + metrics: { total_search_duration_ms: 23423 }, + }) ); }); @@ -2915,6 +2989,9 @@ describe('Task Runner', () => { outcome: 'warning', warning, alertsCount: { active: 1, new: 1 }, + metrics: { + total_search_duration_ms: null, + }, }) ); @@ -3088,6 +3165,9 @@ describe('Task Runner', () => { outcome: 'warning', warning, alertsCount: { active: 2, new: 2 }, + metrics: { + total_search_duration_ms: null, + }, }) ); @@ -3690,6 +3770,7 @@ describe('Task Runner', () => { triggeredActionsStatus: 'complete', hasReachedQueuedActionsLimit, }, + consumerMetrics: undefined, status: { lastExecutionDate: new Date('1970-01-01T00:00:00.000Z'), status, diff --git a/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner.ts b/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner.ts index 79d98bf6a5284..ca6df4fb21100 100644 --- a/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner.ts +++ b/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner.ts @@ -574,6 +574,11 @@ export class TaskRunner< name: runRuleParams.rule.name, consumer: runRuleParams.rule.consumer, revision: runRuleParams.rule.revision, + uuid: + this.ruleType.solution === 'security' && + typeof runRuleParams.rule.params.ruleId === 'string' + ? runRuleParams.rule.params.ruleId + : undefined, }); // Set rule monitoring data @@ -581,7 +586,7 @@ export class TaskRunner< // Clear gap range that was persisted in the rule SO if (this.ruleMonitoring.getMonitoring()?.run?.last_run?.metrics?.gap_range) { - this.ruleMonitoring.getLastRunMetricsSetters().setLastRunMetricsGapRange(null); + this.ruleMonitoring.getSetters().clearGapRange(); } (async () => { try { @@ -608,89 +613,108 @@ export class TaskRunner< schedule: Result; runRuleResult: Result; }) { - const { executionStatus: execStatus, executionMetrics: execMetrics } = - await this.timer.runWithTimer(TaskRunnerTimerSpan.ProcessRuleRun, async () => { - const { - params: { alertId: ruleId }, - startedAt, - schedule: taskSchedule, - } = this.taskInstance; - - let nextRun: string | null = null; - if (isOk(schedule)) { - nextRun = getNextRun({ startDate: startedAt, interval: schedule.value.interval }); - } else if (taskSchedule) { - // rules cannot use rrule for scheduling yet - nextRun = getNextRun({ startDate: startedAt, interval: taskSchedule.interval }); - } + const result = await this.timer.runWithTimer(TaskRunnerTimerSpan.ProcessRuleRun, async () => { + const { + params: { alertId: ruleId }, + startedAt, + schedule: taskSchedule, + } = this.taskInstance; + + let nextRun: string | null = null; + if (isOk(schedule)) { + nextRun = getNextRun({ startDate: startedAt, interval: schedule.value.interval }); + } else if (taskSchedule) { + // rules cannot use rrule for scheduling yet + nextRun = getNextRun({ startDate: startedAt, interval: taskSchedule.interval }); + } - const { executionStatus, executionMetrics, lastRun, outcome } = processRunResults({ - logger: this.logger, - logPrefix: `${this.ruleType.id}:${ruleId}`, - result: this.ruleResult, - runDate: this.runDate, - runRuleResult, + const { executionStatus, executionMetrics, lastRun, outcome } = processRunResults({ + logger: this.logger, + logPrefix: `${this.ruleType.id}:${ruleId}`, + result: this.ruleResult, + runDate: this.runDate, + runRuleResult, + }); + + if (apm.currentTransaction) { + apm.currentTransaction.setOutcome(outcome); + apm.setCustomContext({ + execution_outcome: { + ...this.ruleMonitoring.getExecutorMetrics(), + error: executionStatus.error, + warning: executionStatus.warning, + }, }); + } - if (apm.currentTransaction) { - apm.currentTransaction.setOutcome(outcome); - } + // set start and duration based on event log + const { start, duration } = this.alertingEventLogger.getStartAndDuration(); + if (null != start) { + executionStatus.lastExecutionDate = start; + } + if (null != duration) { + executionStatus.lastDuration = nanosToMillis(duration); + } - // set start and duration based on event log - const { start, duration } = this.alertingEventLogger.getStartAndDuration(); - if (null != start) { - executionStatus.lastExecutionDate = start; - } - if (null != duration) { - executionStatus.lastDuration = nanosToMillis(duration); - } + // if executionStatus indicates an error, fill in fields in + this.ruleMonitoring.addHistory({ + duration: executionStatus.lastDuration, + hasError: executionStatus.error != null, + runDate: this.runDate, + }); - // if executionStatus indicates an error, fill in fields in - this.ruleMonitoring.addHistory({ - duration: executionStatus.lastDuration, - hasError: executionStatus.error != null, - runDate: this.runDate, + // In case of non-success outcome framework calculated metrics could be incomplete + // Instead of including numbers without clear interpretation omit them + if (executionMetrics && lastRun.outcome === 'succeeded') { + this.ruleMonitoring.addFrameworkMetrics({ + total_search_duration_ms: executionMetrics.totalSearchDurationMs, }); + } - const gap = this.ruleMonitoring.getMonitoring()?.run?.last_run?.metrics?.gap_range; - if (gap) { - this.alertingEventLogger.reportGap({ - gap, - }); - } + const gap = this.ruleMonitoring.getMonitoring()?.run?.last_run?.metrics?.gap_range; + if (gap) { + this.alertingEventLogger.reportGap({ + gap, + }); + } - if (!this.cancelled) { - this.inMemoryMetrics.increment(IN_MEMORY_METRICS.RULE_EXECUTIONS); - if (outcome === 'failure') { - this.inMemoryMetrics.increment(IN_MEMORY_METRICS.RULE_FAILURES); - } - if (this.logger.isLevelEnabled('debug')) { - this.logger.debug( - `Updating rule task for ${this.ruleType.id} rule with id ${ruleId} - ${JSON.stringify( - executionStatus - )} - ${JSON.stringify(lastRun)}` - ); - } - - await this.updateRuleSavedObjectPostRun(ruleId, { - executionStatus: ruleExecutionStatusToRaw(executionStatus), - nextRun, - lastRun: lastRunToRaw(lastRun), - monitoring: this.ruleMonitoring.getMonitoring() as RawRuleMonitoring, - }); + if (!this.cancelled) { + this.inMemoryMetrics.increment(IN_MEMORY_METRICS.RULE_EXECUTIONS); + if (outcome === 'failure') { + this.inMemoryMetrics.increment(IN_MEMORY_METRICS.RULE_FAILURES); } - - if (startedAt) { - // Capture how long it took for the rule to run after being claimed - this.timer.setDuration(TaskRunnerTimerSpan.TotalRunDuration, startedAt); + if (this.logger.isLevelEnabled('debug')) { + this.logger.debug( + `Updating rule task for ${this.ruleType.id} rule with id ${ruleId} - ${JSON.stringify( + executionStatus + )} - ${JSON.stringify(lastRun)}` + ); } - return { executionStatus, executionMetrics }; - }); + await this.updateRuleSavedObjectPostRun(ruleId, { + executionStatus: ruleExecutionStatusToRaw(executionStatus), + nextRun, + lastRun: lastRunToRaw(lastRun), + monitoring: this.ruleMonitoring.getMonitoring() as RawRuleMonitoring, + }); + } + + if (startedAt) { + // Capture how long it took for the rule to run after being claimed + this.timer.setDuration(TaskRunnerTimerSpan.TotalRunDuration, startedAt); + } + + return { + executionStatus, + executionMetrics, + consumerExecutionMetrics: this.ruleMonitoring.getExecutorMetrics(), + }; + }); this.alertingEventLogger.done({ - status: execStatus, - metrics: execMetrics, + status: result.executionStatus, + metrics: result.executionMetrics, + consumerMetrics: result.consumerExecutionMetrics, timings: this.timer.toJson(), }); } diff --git a/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner_alerts_client.test.ts b/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner_alerts_client.test.ts index 869065d4d9c0a..11ca5f1fdc018 100644 --- a/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner_alerts_client.test.ts +++ b/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner_alerts_client.test.ts @@ -399,7 +399,11 @@ describe('Task Runner', () => { ); expect(elasticsearchService.client.asInternalUser.update).toHaveBeenCalledWith( - ...generateRuleUpdateParams({}) + ...generateRuleUpdateParams({ + metrics: { + total_search_duration_ms: 23423, + }, + }) ); expect(taskRunnerFactoryInitializerParams.executionContext.withContext).toBeCalledTimes(1); @@ -534,7 +538,11 @@ describe('Task Runner', () => { { tags: ['1', 'test'] } ); expect(elasticsearchService.client.asInternalUser.update).toHaveBeenCalledWith( - ...generateRuleUpdateParams({}) + ...generateRuleUpdateParams({ + metrics: { + total_search_duration_ms: 23423, + }, + }) ); expect(taskRunnerFactoryInitializerParams.executionContext.withContext).toBeCalledTimes(1); expect( @@ -736,7 +744,11 @@ describe('Task Runner', () => { }); expect(elasticsearchService.client.asInternalUser.update).toHaveBeenCalledWith( - ...generateRuleUpdateParams({}) + ...generateRuleUpdateParams({ + metrics: { + total_search_duration_ms: 23423, + }, + }) ); expect(taskRunnerFactoryInitializerParams.executionContext.withContext).toBeCalledTimes(1); @@ -827,7 +839,11 @@ describe('Task Runner', () => { }); expect(elasticsearchService.client.asInternalUser.update).toHaveBeenCalledWith( - ...generateRuleUpdateParams({}) + ...generateRuleUpdateParams({ + metrics: { + total_search_duration_ms: 23423, + }, + }) ); expect(taskRunnerFactoryInitializerParams.executionContext.withContext).toBeCalledTimes(1); diff --git a/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner_cancel.test.ts b/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner_cancel.test.ts index 7bb44b575e5fd..de8dad6097a2c 100644 --- a/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner_cancel.test.ts +++ b/x-pack/platform/plugins/shared/alerting/server/task_runner/task_runner_cancel.test.ts @@ -279,8 +279,7 @@ describe('Task Runner Cancel', () => { metrics: { duration: 0, gap_duration_s: null, - // TODO: uncomment after intermidiate release - // gap_range: null, + gap_range: null, total_alerts_created: null, total_alerts_detected: null, total_indexing_duration_ms: null, diff --git a/x-pack/platform/plugins/shared/alerting/server/types.ts b/x-pack/platform/plugins/shared/alerting/server/types.ts index 2f0dc15baab6d..52f2315168bbb 100644 --- a/x-pack/platform/plugins/shared/alerting/server/types.ts +++ b/x-pack/platform/plugins/shared/alerting/server/types.ts @@ -451,13 +451,23 @@ export type RulesSettingsClientApi = PublicMethodsOf; export type RulesSettingsFlappingClientApi = PublicMethodsOf; export type RulesSettingsQueryDelayClientApi = PublicMethodsOf; -export interface PublicMetricsSetters { - setLastRunMetricsTotalSearchDurationMs: (totalSearchDurationMs: number) => void; - setLastRunMetricsTotalIndexingDurationMs: (totalIndexingDurationMs: number) => void; - setLastRunMetricsTotalAlertsDetected: (totalAlertDetected: number) => void; - setLastRunMetricsTotalAlertsCreated: (totalAlertCreated: number) => void; - setLastRunMetricsGapDurationS: (gapDurationS: number) => void; - setLastRunMetricsGapRange: (gapRange: { lte: string; gte: string } | null) => void; +export interface ConsumerExecutionMetrics { + total_indexing_duration_ms: number; + total_enrichment_duration_ms: number; + gap_duration_s: number; + gap_range: { lte: string; gte: string }; + alerts_candidate_count: number; + alerts_suppressed_count: number; + frozen_indices_queried_count: number; +} + +export interface PublicRuleMonitoringService { + setMetric: ( + metricName: MetricName, + value: ConsumerExecutionMetrics[MetricName] + ) => void; + setMetrics: (metrics: Partial) => void; + clearGapRange: () => void; } export interface PublicLastRunSetters { @@ -466,8 +476,6 @@ export interface PublicLastRunSetters { setLastRunOutcomeMessage: (warning: string) => void; } -export type PublicRuleMonitoringService = PublicMetricsSetters; - export type PublicRuleResultService = PublicLastRunSetters; export type { diff --git a/x-pack/platform/plugins/shared/event_log/generated/mappings.json b/x-pack/platform/plugins/shared/event_log/generated/mappings.json index c6205c9498482..a5977f7bd37d2 100644 --- a/x-pack/platform/plugins/shared/event_log/generated/mappings.json +++ b/x-pack/platform/plugins/shared/event_log/generated/mappings.json @@ -489,6 +489,12 @@ }, "update_alerts_duration_ms": { "type": "long" + }, + "alerts_candidate_count": { + "type": "long" + }, + "alerts_suppressed_count": { + "type": "long" } } } diff --git a/x-pack/platform/plugins/shared/event_log/generated/schemas.ts b/x-pack/platform/plugins/shared/event_log/generated/schemas.ts index 196f810192df6..9b058a7df9104 100644 --- a/x-pack/platform/plugins/shared/event_log/generated/schemas.ts +++ b/x-pack/platform/plugins/shared/event_log/generated/schemas.ts @@ -209,6 +209,8 @@ export const EventSchema = schema.maybe( total_run_duration_ms: ecsStringOrNumber(), total_enrichment_duration_ms: ecsStringOrNumber(), update_alerts_duration_ms: ecsStringOrNumber(), + alerts_candidate_count: ecsStringOrNumber(), + alerts_suppressed_count: ecsStringOrNumber(), }) ), }) diff --git a/x-pack/platform/plugins/shared/event_log/scripts/mappings.js b/x-pack/platform/plugins/shared/event_log/scripts/mappings.js index 7f58de578a667..8a1494edaf3ef 100644 --- a/x-pack/platform/plugins/shared/event_log/scripts/mappings.js +++ b/x-pack/platform/plugins/shared/event_log/scripts/mappings.js @@ -256,6 +256,12 @@ exports.EcsCustomPropertyMappings = { update_alerts_duration_ms: { type: 'long', }, + alerts_candidate_count: { + type: 'long', + }, + alerts_suppressed_count: { + type: 'long', + }, }, }, }, diff --git a/x-pack/platform/test/alerting_api_integration/common/plugins/alerts/server/plugin.ts b/x-pack/platform/test/alerting_api_integration/common/plugins/alerts/server/plugin.ts index a53212c3be0d8..c487febd58023 100644 --- a/x-pack/platform/test/alerting_api_integration/common/plugins/alerts/server/plugin.ts +++ b/x-pack/platform/test/alerting_api_integration/common/plugins/alerts/server/plugin.ts @@ -59,6 +59,7 @@ export interface FixtureStartDeps { const testRuleTypes = [ 'test.always-firing', 'test.cumulative-firing', + 'test.consumer-metrics', 'test.never-firing', 'test.failing', 'test.authorization', diff --git a/x-pack/platform/test/alerting_api_integration/common/plugins/alerts/server/rule_types.ts b/x-pack/platform/test/alerting_api_integration/common/plugins/alerts/server/rule_types.ts index 4abec7c160f63..72183618dbb68 100644 --- a/x-pack/platform/test/alerting_api_integration/common/plugins/alerts/server/rule_types.ts +++ b/x-pack/platform/test/alerting_api_integration/common/plugins/alerts/server/rule_types.ts @@ -195,6 +195,48 @@ function getCumulativeFiringRuleType() { return result; } +function getConsumerMetricsRuleType() { + const paramsSchema = schema.object({ + index: schema.string(), + reference: schema.string(), + }); + type ParamsType = TypeOf; + const result: RuleType = { + id: 'test.consumer-metrics', + name: 'Test: Consumer metrics', + actionGroups: [{ id: 'default', name: 'Default' }], + category: 'kibana', + producer: 'alertsFixture', + solution: 'stack', + defaultActionGroupId: 'default', + minimumLicenseRequired: 'basic', + isExportable: true, + autoRecoverAlerts: false, + async executor({ services, params }) { + services.ruleMonitoringService?.setMetrics({ + alerts_candidate_count: 90357, + alerts_suppressed_count: 42, + total_indexing_duration_ms: 987, + total_enrichment_duration_ms: 654, + frozen_indices_queried_count: 3, + }); + await services.scopedClusterClient.asCurrentUser.index({ + index: params.index, + refresh: 'wait_for', + document: { + reference: params.reference, + source: 'alert:test.consumer-metrics', + }, + }); + return { state: {} }; + }, + validate: { + params: paramsSchema, + }, + }; + return result; +} + function getNeverFiringRuleType() { const paramsSchema = schema.object({ index: schema.string(), @@ -1750,6 +1792,7 @@ export function defineRuleTypes( alerting.registerType(getAlwaysFiringRuleType()); alerting.registerType(getCumulativeFiringRuleType()); + alerting.registerType(getConsumerMetricsRuleType()); alerting.registerType(getNeverFiringRuleType()); alerting.registerType(getFailingRuleType()); alerting.registerType(getValidationRuleType()); diff --git a/x-pack/platform/test/alerting_api_integration/security_and_spaces/group1/tests/alerting/backfill/consumer_metrics_backfill.ts b/x-pack/platform/test/alerting_api_integration/security_and_spaces/group1/tests/alerting/backfill/consumer_metrics_backfill.ts new file mode 100644 index 0000000000000..500bb8e94f8c4 --- /dev/null +++ b/x-pack/platform/test/alerting_api_integration/security_and_spaces/group1/tests/alerting/backfill/consumer_metrics_backfill.ts @@ -0,0 +1,124 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import expect from '@kbn/expect'; +import moment from 'moment'; +import { ESTestIndexTool, ES_TEST_INDEX_NAME } from '@kbn/alerting-api-integration-helpers'; +import type { IValidatedEvent } from '@kbn/event-log-plugin/server'; +import { getUrlPrefix, ObjectRemover } from '../../../../../common/lib'; +import { waitForEventLogDocs } from './test_utils'; +import { SuperuserAtSpace1 } from '../../../../scenarios'; +import type { FtrProviderContext } from '../../../../../common/ftr_provider_context'; + +const EXPECTED_CONSUMER_METRICS = { + alerts_candidate_count: 90357, + alerts_suppressed_count: 42, + total_indexing_duration_ms: 987, + total_enrichment_duration_ms: 654, + frozen_indices_queried_count: 3, +} as const; + +export default function consumerMetricsBackfillTests({ getService }: FtrProviderContext) { + const es = getService('es'); + const retry = getService('retry'); + const esTestIndexTool = new ESTestIndexTool(es, retry); + const supertestWithoutAuth = getService('supertestWithoutAuth'); + const supertest = getService('supertest'); + const objectRemover = new ObjectRemover(supertest); + + describe('consumer metrics on execute-backfill events', () => { + let backfillId: string | undefined; + + beforeEach(async () => { + backfillId = undefined; + await esTestIndexTool.destroy(); + await esTestIndexTool.setup(); + }); + + afterEach(async () => { + if (backfillId) { + await supertest + .delete( + `${getUrlPrefix( + SuperuserAtSpace1.space.id + )}/internal/alerting/rules/backfill/${backfillId}` + ) + .set('kbn-xsrf', 'foo'); + } + await objectRemover.removeAll(); + await esTestIndexTool.destroy(); + }); + + it('writes consumer metrics from the rule executor to execute-backfill events', async () => { + const spaceId = SuperuserAtSpace1.space.id; + + const response1 = await supertestWithoutAuth + .post(`${getUrlPrefix(spaceId)}/api/alerting/rule`) + .set('kbn-xsrf', 'foo') + .auth(SuperuserAtSpace1.user.username, SuperuserAtSpace1.user.password) + .send({ + enabled: true, + name: 'consumer metrics backfill rule', + tags: [], + rule_type_id: 'test.consumer-metrics', + consumer: 'alertsFixture', + schedule: { interval: '24h' }, + actions: [], + params: { + index: ES_TEST_INDEX_NAME, + reference: 'backfill-ref', + }, + }) + .expect(200); + + const ruleId = response1.body.id; + objectRemover.add(spaceId, ruleId, 'rule', 'alerting'); + + const start = moment().utc().startOf('day').subtract(7, 'days').toISOString(); + const end = moment().utc().startOf('day').subtract(1, 'day').toISOString(); + + const response2 = await supertestWithoutAuth + .post(`${getUrlPrefix(spaceId)}/internal/alerting/rules/backfill/_schedule`) + .set('kbn-xsrf', 'foo') + .auth(SuperuserAtSpace1.user.username, SuperuserAtSpace1.user.password) + .send([{ rule_id: ruleId, ranges: [{ start, end }] }]) + .expect(200); + + backfillId = response2.body[0].id; + + const events: IValidatedEvent[] = await waitForEventLogDocs( + retry, + getService, + backfillId as string, + spaceId, + new Map([['execute-backfill', { gte: 1 }]]) + ); + + const backfillEvents = events.filter((e) => e?.event?.action === 'execute-backfill'); + expect(backfillEvents.length > 0).to.be(true); + + for (const e of backfillEvents) { + const metrics = e?.kibana?.alert?.rule?.execution?.metrics; + expect(metrics?.alerts_candidate_count).to.eql( + EXPECTED_CONSUMER_METRICS.alerts_candidate_count + ); + expect(metrics?.alerts_suppressed_count).to.eql( + EXPECTED_CONSUMER_METRICS.alerts_suppressed_count + ); + expect(metrics?.total_indexing_duration_ms).to.eql( + EXPECTED_CONSUMER_METRICS.total_indexing_duration_ms + ); + expect(metrics?.total_enrichment_duration_ms).to.eql( + EXPECTED_CONSUMER_METRICS.total_enrichment_duration_ms + ); + expect(metrics?.frozen_indices_queried_count).to.eql( + EXPECTED_CONSUMER_METRICS.frozen_indices_queried_count + ); + } + }); + }); +} diff --git a/x-pack/platform/test/alerting_api_integration/security_and_spaces/group1/tests/alerting/backfill/index.ts b/x-pack/platform/test/alerting_api_integration/security_and_spaces/group1/tests/alerting/backfill/index.ts index d791e7535e753..a5e0838779f59 100644 --- a/x-pack/platform/test/alerting_api_integration/security_and_spaces/group1/tests/alerting/backfill/index.ts +++ b/x-pack/platform/test/alerting_api_integration/security_and_spaces/group1/tests/alerting/backfill/index.ts @@ -16,5 +16,6 @@ export default function backfillTests({ loadTestFile }: FtrProviderContext) { loadTestFile(require.resolve('./delete')); loadTestFile(require.resolve('./delete_rule')); loadTestFile(require.resolve('./public_api')); + loadTestFile(require.resolve('./consumer_metrics_backfill')); }); } diff --git a/x-pack/platform/test/alerting_api_integration/security_and_spaces/group3/tests/alerting/clone.ts b/x-pack/platform/test/alerting_api_integration/security_and_spaces/group3/tests/alerting/clone.ts index bf58386562cff..48a1eb76255d8 100644 --- a/x-pack/platform/test/alerting_api_integration/security_and_spaces/group3/tests/alerting/clone.ts +++ b/x-pack/platform/test/alerting_api_integration/security_and_spaces/group3/tests/alerting/clone.ts @@ -169,8 +169,7 @@ export default function createAlertTests({ getService }: FtrProviderContext) { total_alerts_detected: null, total_alerts_created: null, gap_duration_s: null, - // TODO: uncomment after intermidiate release - // gap_range: null, + gap_range: null, }, }, }, diff --git a/x-pack/platform/test/alerting_api_integration/security_and_spaces/group3/tests/alerting/user_managed_api_key.ts b/x-pack/platform/test/alerting_api_integration/security_and_spaces/group3/tests/alerting/user_managed_api_key.ts index cfb08e4dbb80f..5681b01a582b2 100644 --- a/x-pack/platform/test/alerting_api_integration/security_and_spaces/group3/tests/alerting/user_managed_api_key.ts +++ b/x-pack/platform/test/alerting_api_integration/security_and_spaces/group3/tests/alerting/user_managed_api_key.ts @@ -257,8 +257,7 @@ export default function userManagedApiKeyTest({ getService }: FtrProviderContext total_alerts_detected: null, total_alerts_created: null, gap_duration_s: null, - // TODO: uncomment after intermidiate release - // gap_range: null, + gap_range: null, }, }, }, @@ -342,8 +341,7 @@ export default function userManagedApiKeyTest({ getService }: FtrProviderContext total_alerts_detected: null, total_alerts_created: null, gap_duration_s: null, - // TODO: uncomment after intermidiate release - // gap_range: null, + gap_range: null, }, }, }, diff --git a/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group2/monitoring.ts b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group2/monitoring.ts index e76dfe16731ca..5b2c2f6322770 100644 --- a/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group2/monitoring.ts +++ b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group2/monitoring.ts @@ -6,13 +6,16 @@ */ import expect from '@kbn/expect'; +import { ESTestIndexTool } from '@kbn/alerting-api-integration-helpers'; import { Spaces } from '../../../scenarios'; import { getUrlPrefix, getTestRuleData, ObjectRemover } from '../../../../common/lib'; import type { FtrProviderContext } from '../../../../common/ftr_provider_context'; +import { createEsDocuments } from '../create_test_data'; export default function monitoringAlertTests({ getService }: FtrProviderContext) { const supertest = getService('supertest'); const retry = getService('retry'); + const es = getService('es'); describe('monitoring', () => { const objectRemover = new ObjectRemover(supertest); @@ -144,6 +147,63 @@ export default function monitoringAlertTests({ getService }: FtrProviderContext) expect(history.duration).to.be.a('number'); }); }); + + describe('last run framework metrics', () => { + const esTestIndexTool = new ESTestIndexTool(es, retry); + const RULE_INTERVAL_SECONDS = 6; + const RULE_INTERVALS_TO_WRITE = 5; + const RULE_INTERVAL_MILLIS = RULE_INTERVAL_SECONDS * 1000; + const ES_GROUPS_TO_WRITE = 3; + + afterEach(async () => { + await esTestIndexTool.destroy(); + }); + + it('persists total_search_duration_ms on the rule after successful rule tun', async () => { + await esTestIndexTool.destroy(); + await esTestIndexTool.setup(); + + const endDateMillis = Date.now() + (RULE_INTERVALS_TO_WRITE - 1) * RULE_INTERVAL_MILLIS; + const endDate = new Date(endDateMillis).toISOString(); + + await createEsDocuments( + es, + esTestIndexTool, + endDate, + RULE_INTERVALS_TO_WRITE, + RULE_INTERVAL_MILLIS, + ES_GROUPS_TO_WRITE + ); + + const createResponse = await supertest + .post(`${getUrlPrefix(Spaces.space1.id)}/api/alerting/rule`) + .set('kbn-xsrf', 'foo') + .send( + getTestRuleData({ + rule_type_id: 'test.cancellableRule', + schedule: { interval: '1h' }, + params: { + doLongSearch: false, + doLongPostProcessing: false, + }, + }) + ) + .expect(200); + + objectRemover.add(Spaces.space1.id, createResponse.body.id, 'rule', 'alerting'); + + await waitForExecutionCount(1, createResponse.body.id); + + const getResponse = await supertest + .get(`${getUrlPrefix(Spaces.space1.id)}/internal/alerting/rule/${createResponse.body.id}`) + .expect(200); + + const totalSearchMs = + getResponse.body.monitoring.run.last_run.metrics.total_search_duration_ms; + + expect(totalSearchMs).to.be.greaterThan(0); + }); + }); }); const MAX_ATTEMPTS = 25; diff --git a/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group5/config.ts b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group5/config.ts new file mode 100644 index 0000000000000..f83e3c7041656 --- /dev/null +++ b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group5/config.ts @@ -0,0 +1,23 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { createTestConfig } from '../../../../common/config'; + +export const EmailDomainsAllowed = ['example.org', 'test.com']; + +export default createTestConfig('spaces_only', { + disabledPlugins: ['security'], + license: 'trial', + enableActionsProxy: false, + verificationMode: 'none', + customizeLocalHostSsl: true, + preconfiguredAlertHistoryEsIndex: true, + emailDomainsAllowed: EmailDomainsAllowed, + useDedicatedTaskRunner: true, + testFiles: [require.resolve('.')], + reportName: 'X-Pack Alerting API Integration Tests - Alerting - group5', +}); diff --git a/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group5/consumer_metrics.ts b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group5/consumer_metrics.ts new file mode 100644 index 0000000000000..0dc125b10bd7d --- /dev/null +++ b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group5/consumer_metrics.ts @@ -0,0 +1,104 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import expect from '@kbn/expect'; +import { ESTestIndexTool, ES_TEST_INDEX_NAME } from '@kbn/alerting-api-integration-helpers'; +import { Spaces } from '../../../scenarios'; +import { + getUrlPrefix, + getTestRuleData, + ObjectRemover, + getEventLog, + resetRulesSettings, +} from '../../../../common/lib'; +import type { FtrProviderContext } from '../../../../common/ftr_provider_context'; + +const EXPECTED_CONSUMER_METRICS = { + alerts_candidate_count: 90357, + alerts_suppressed_count: 42, + total_indexing_duration_ms: 987, + total_enrichment_duration_ms: 654, + frozen_indices_queried_count: 3, +} as const; + +export default function consumerMetricsEventLogTests({ getService }: FtrProviderContext) { + const supertest = getService('supertest'); + const retry = getService('retry'); + const es = getService('es'); + const esTestIndexTool = new ESTestIndexTool(es, retry); + const objectRemover = new ObjectRemover(supertest); + + describe('consumer metrics on rule execute event', () => { + beforeEach(async () => { + await resetRulesSettings(supertest, Spaces.default.id); + await resetRulesSettings(supertest, Spaces.space1.id); + await esTestIndexTool.destroy(); + await esTestIndexTool.setup(); + }); + + afterEach(async () => { + await objectRemover.removeAll(); + }); + + it('writes consumer metrics from the rule executor to the execute event', async () => { + const spaceId = Spaces.default.id; + const response = await supertest + .post(`${getUrlPrefix(spaceId)}/api/alerting/rule`) + .set('kbn-xsrf', 'foo') + .send( + getTestRuleData({ + rule_type_id: 'test.consumer-metrics', + schedule: { interval: '24h' }, + throttle: null, + notify_when: undefined, + params: { + index: ES_TEST_INDEX_NAME, + reference: 'consumer-metrics-ref', + }, + actions: [], + }) + ) + .expect(200); + + const ruleId = response.body.id; + objectRemover.add(spaceId, ruleId, 'rule', 'alerting'); + + const events = await retry.try(async () => { + return await getEventLog({ + getService, + spaceId, + type: 'alert', + id: ruleId, + provider: 'alerting', + actions: new Map([ + ['execute-start', { equal: 1 }], + ['execute', { equal: 1 }], + ]), + }); + }); + + const executeEvent = events.find((e) => e?.event?.action === 'execute'); + expect(executeEvent).to.be.ok(); + const metrics = executeEvent?.kibana?.alert?.rule?.execution?.metrics; + expect(metrics?.alerts_candidate_count).to.eql( + EXPECTED_CONSUMER_METRICS.alerts_candidate_count + ); + expect(metrics?.alerts_suppressed_count).to.eql( + EXPECTED_CONSUMER_METRICS.alerts_suppressed_count + ); + expect(metrics?.total_indexing_duration_ms).to.eql( + EXPECTED_CONSUMER_METRICS.total_indexing_duration_ms + ); + expect(metrics?.total_enrichment_duration_ms).to.eql( + EXPECTED_CONSUMER_METRICS.total_enrichment_duration_ms + ); + expect(metrics?.frozen_indices_queried_count).to.eql( + EXPECTED_CONSUMER_METRICS.frozen_indices_queried_count + ); + }); + }); +} diff --git a/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group5/index.ts b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group5/index.ts index e97f040191948..03447e8d27713 100644 --- a/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group5/index.ts +++ b/x-pack/platform/test/alerting_api_integration/spaces_only/tests/alerting/group5/index.ts @@ -12,6 +12,7 @@ export default function alertingTests({ loadTestFile, getService }: FtrProviderC describe('Alerting', () => { before(async () => await buildUp(getService)); after(async () => await tearDown(getService)); + loadTestFile(require.resolve('./consumer_metrics')); // the next file takes ~37 min to run loadTestFile(require.resolve('./event_log')); }); diff --git a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/__mocks__/index.ts b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/__mocks__/index.ts index 309b970e053f0..9e05004d3de28 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/__mocks__/index.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/__mocks__/index.ts @@ -51,7 +51,11 @@ const ruleExecutionLogForExecutorsMock = { warn: jest.fn(), error: jest.fn(), - logStatusChange: jest.fn(), + logMetric: jest.fn(), + logMetrics: jest.fn(), + + closed: jest.fn(), + close: jest.fn(), }), }; diff --git a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/client_for_executors/client.ts b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/client_for_executors/client.ts index 35c8d94ae289c..43117a0f0d2ae 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/client_for_executors/client.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/client_for_executors/client.ts @@ -5,59 +5,66 @@ * 2.0. */ +import { omitBy } from 'lodash'; import agent from 'elastic-apm-node'; import type { Logger } from '@kbn/core/server'; -import { sum } from 'lodash'; -import type { Duration } from 'moment'; - import type { PublicRuleMonitoringService, PublicRuleResultService, } from '@kbn/alerting-plugin/server/types'; import type { RuleExecutionSettings, - RuleExecutionStatus, LogLevel, + RuleExecutionStatus, } from '../../../../../../../common/api/detection_engine/rule_monitoring'; import { consoleLogLevelFromExecutionStatus, - LogLevelSetting, - logLevelToNumber, RuleExecutionStatusEnum, } from '../../../../../../../common/api/detection_engine/rule_monitoring'; - import { assertUnreachable } from '../../../../../../../common/utility_types'; import { withSecuritySpan } from '../../../../../../utils/with_security_span'; import type { ExtMeta } from '../../utils/console_logging'; -import { truncateValue } from '../../utils/normalization'; -import { getCorrelationIds } from './correlation_ids'; - +import { truncateList, truncateValue } from '../../utils/normalization'; import type { IEventLogWriter } from '../event_log/event_log_writer'; +import { + LogLevelEnum, + LogLevelSetting, + logLevelToNumber, +} from '../../../../../../../common/api/detection_engine/rule_monitoring/model'; +import { SECURITY_RULE_STATUS } from '../../../../rule_types/utils/apm_field_names'; import type { + ExecutionResult, IRuleExecutionLogForExecutors, + LogErrorMessageOptions, LogMessageOptions, RuleExecutionContext, - StatusChangeArgs, + RuleExecutionLogMetrics, } from './client_interface'; -import type { RuleExecutionMetrics } from '../../../../../../../common/api/detection_engine/rule_monitoring/model'; -import { LogLevelEnum } from '../../../../../../../common/api/detection_engine/rule_monitoring/model'; -import { SECURITY_RULE_STATUS } from '../../../../rule_types/utils/apm_field_names'; +import { getCorrelationIds } from './correlation_ids'; +import { checkErrorDetails } from '../../../../rule_types/utils/check_error_details'; -export const createRuleExecutionLogClientForExecutors = ( +export function createRuleExecutionLogClientForExecutors( settings: RuleExecutionSettings, eventLog: IEventLogWriter, logger: Logger, context: RuleExecutionContext, ruleMonitoringService: PublicRuleMonitoringService, ruleResultService: PublicRuleResultService -): IRuleExecutionLogForExecutors => { +): IRuleExecutionLogForExecutors { const baseCorrelationIds = getCorrelationIds(context); const baseLogSuffix = baseCorrelationIds.getLogSuffix(); const baseLogMeta = baseCorrelationIds.getLogMeta(); - const { executionId, ruleId, ruleUuid, ruleName, ruleRevision, ruleType, spaceId } = context; - const client: IRuleExecutionLogForExecutors = { + // Buffers the execution related data + const executionResultBuffer: ExecutionResultBuffer = { + errors: [], + warnings: [], + metrics: {}, + closed: false, + }; + + const ruleExecutionLogClient: IRuleExecutionLogForExecutors = { get context() { return context; }, @@ -84,93 +91,119 @@ export const createRuleExecutionLogClientForExecutors = ( }, warn(message: string, options?: LogMessageOptions): void { + executionResultBuffer.warnings.push(message); + writeMessage(message, { eventLogLevel: LogLevelEnum.warn, consoleLogLevel: options?.consoleLogLevel, }); }, - error(message: string, options?: LogMessageOptions): void { + error(message: string, options?: LogErrorMessageOptions): void { + executionResultBuffer.errors.push({ + message, + userError: options?.userError ?? false, + }); + writeMessage(message, { eventLogLevel: LogLevelEnum.error, consoleLogLevel: options?.consoleLogLevel, }); }, - async logStatusChange(args: StatusChangeArgs): Promise { - await withSecuritySpan('IRuleExecutionLogForExecutors.logStatusChange', async () => { - const correlationIds = baseCorrelationIds.withStatus(args.newStatus); + logMetric( + metricName: Metric, + value: NonNullable + ): void { + if (this.closed()) { + return; + } + + executionResultBuffer.metrics[metricName] = value; + + // total_search_duration_ms gets calculated and logged at the Alerting Framework level + if (metricName !== 'total_search_duration_ms') { + ruleMonitoringService.setMetric(metricName, value); + } + }, + + logMetrics(metrics: Partial): void { + if (this.closed()) { + return; + } + + Object.assign(executionResultBuffer.metrics, metrics); + + // total_search_duration_ms gets calculated and logged at the Alerting Framework level + ruleMonitoringService.setMetrics( + omitBy(metrics, (value, key) => value == null || key === 'total_search_duration_ms') + ); + }, + + closed(): boolean { + return executionResultBuffer.closed; + }, + + async close(): Promise { + if (this.closed()) { + throw new Error('The logger has been closed'); + } + + executionResultBuffer.closed = true; + + await withSecuritySpan('IRuleExecutionLogForExecutors.close', async () => { + const executionResult: ExecutionResult = determineExecutionResult(executionResultBuffer); + + const correlationIds = baseCorrelationIds.withStatus(executionResult.status); const logMeta = correlationIds.getLogMeta(); - agent.addLabels({ [SECURITY_RULE_STATUS]: args.newStatus }); + agent.addLabels({ [SECURITY_RULE_STATUS]: executionResult.status }); try { - const normalizedArgs = normalizeStatusChangeArgs(args); + const normalizedExecutionResult: ExecutionResult = { + status: executionResult.status, + message: truncateValue(executionResult.message) ?? '', + userError: executionResult.userError, + }; + + writeStatusChangeToEventLog(normalizedExecutionResult); + writeMetricsToEventLog(executionResultBuffer.metrics); await Promise.all([ - writeStatusChangeToConsole(normalizedArgs, logMeta), - writeStatusChangeToRuleObject(normalizedArgs), - writeStatusChangeToEventLog(normalizedArgs), + writeExecutionResultToConsole(normalizedExecutionResult, logMeta), + writeExecutionResultToRuleObject(normalizedExecutionResult), ]); } catch (e) { - const logMessage = `Error changing rule status to "${args.newStatus}"`; + const logMessage = `Error writing execution result with status "${executionResult.status}"`; writeExceptionToConsole(e, logMessage, logMeta); } }); }, }; - const writeMessage = ( - message: string, - levels: { eventLogLevel: LogLevel; consoleLogLevel?: LogLevel } - ): void => { - writeMessageToConsole(message, levels.consoleLogLevel ?? LogLevelEnum.debug, baseLogMeta); - writeMessageToEventLog(message, levels.eventLogLevel); - }; - - const writeMessageToConsole = (message: string, logLevel: LogLevel, logMeta: ExtMeta): void => { - switch (logLevel) { - case LogLevelEnum.trace: - logger.trace(`${message} ${baseLogSuffix}`, logMeta); - break; - case LogLevelEnum.debug: - logger.debug(`${message} ${baseLogSuffix}`, logMeta); - break; - case LogLevelEnum.info: - logger.info(`${message} ${baseLogSuffix}`, logMeta); - break; - case LogLevelEnum.warn: - logger.warn(`${message} ${baseLogSuffix}`, logMeta); - break; - case LogLevelEnum.error: - logger.error(`${message} ${baseLogSuffix}`, logMeta); - break; - default: - assertUnreachable(logLevel); - } - }; - - const writeMessageToEventLog = (message: string, logLevel: LogLevel): void => { - const { isEnabled, minLevel } = settings.extendedLogging; - - if (!isEnabled || minLevel === LogLevelSetting.off) { - return; - } - if (logLevelToNumber(logLevel) < logLevelToNumber(minLevel)) { - return; + const determineExecutionResult = ({ + errors, + warnings, + }: ExecutionResultBuffer): ExecutionResult => { + let status: RuleExecutionStatus = RuleExecutionStatusEnum.succeeded; + let message = 'Rule execution completed successfully'; + + if (errors.length > 0) { + status = RuleExecutionStatusEnum.failed; + message = truncateList(errors.map((e) => e.message)).join(', '); + } else if (warnings.length > 0) { + status = RuleExecutionStatusEnum['partial failure']; + message = truncateList(warnings).join('\n\n'); } - eventLog.logMessage({ - ruleId, - ruleUuid, - ruleName, - ruleRevision, - ruleType, - spaceId, - executionId, + return { + status, message, - logLevel, - }); + userError: errors.every( + ({ message: errorMessage, userError }) => + userError || checkErrorDetails(new Error(errorMessage)).isUserError + ), + }; }; const writeExceptionToConsole = (e: unknown, message: string, logMeta: ExtMeta): void => { @@ -178,57 +211,44 @@ export const createRuleExecutionLogClientForExecutors = ( writeMessageToConsole(`${message}. Reason: ${logReason}`, LogLevelEnum.error, logMeta); }; - const writeStatusChangeToConsole = (args: NormalizedStatusChangeArgs, logMeta: ExtMeta): void => { - const messageParts: string[] = [`Changing rule status to "${args.newStatus}"`, args.message]; + const writeExecutionResultToConsole = (args: ExecutionResult, logMeta: ExtMeta): void => { + const messageParts: string[] = [`Changing rule status to "${args.status}"`, args.message]; const logMessage = messageParts.filter(Boolean).join('. '); - const logLevel = consoleLogLevelFromExecutionStatus(args.newStatus, args.userError); + const logLevel = consoleLogLevelFromExecutionStatus(args.status, args.userError); + writeMessageToConsole(logMessage, logLevel, logMeta); }; - const writeStatusChangeToRuleObject = async (args: NormalizedStatusChangeArgs): Promise => { - const { newStatus, message, metrics, userError } = args; + const writeExecutionResultToRuleObject = async (args: ExecutionResult): Promise => { + const { status, message, userError } = args; - if (newStatus === RuleExecutionStatusEnum.running) { - return; - } - - const { - total_search_duration_ms: totalSearchDurationMs, - total_indexing_duration_ms: totalIndexingDurationMs, - execution_gap_duration_s: executionGapDurationS, - gap_range: gapRange, - } = metrics ?? {}; - - if (totalSearchDurationMs) { - ruleMonitoringService.setLastRunMetricsTotalSearchDurationMs(totalSearchDurationMs); - } - - if (totalIndexingDurationMs) { - ruleMonitoringService.setLastRunMetricsTotalIndexingDurationMs(totalIndexingDurationMs); - } - - if (executionGapDurationS) { - ruleMonitoringService.setLastRunMetricsGapDurationS(executionGapDurationS); - } - - if (gapRange) { - ruleMonitoringService.setLastRunMetricsGapRange(gapRange); - } - - if (newStatus === RuleExecutionStatusEnum.failed) { + if (status === RuleExecutionStatusEnum.failed) { ruleResultService.addLastRunError(message, userError ?? false); - } else if (newStatus === RuleExecutionStatusEnum['partial failure']) { + } else if (status === RuleExecutionStatusEnum['partial failure']) { ruleResultService.addLastRunWarning(message); } ruleResultService.setLastRunOutcomeMessage(message); }; - const writeStatusChangeToEventLog = (args: NormalizedStatusChangeArgs): void => { - const { newStatus, message, metrics } = args; + const writeMessage = ( + message: string, + levels: { eventLogLevel: LogLevel; consoleLogLevel?: LogLevel } + ): void => { + writeMessageToConsole(message, levels.consoleLogLevel ?? LogLevelEnum.debug, baseLogMeta); + writeMessageToEventLog(message, levels.eventLogLevel); + }; + + /** + * @deprecated To be removed in favor of Alerting Framework's "execute" event + */ + const writeStatusChangeToEventLog = (args: ExecutionResult): void => { + const { status, message } = args; - if (metrics) { - eventLog.logExecutionMetrics({ + eventLog.logStatusChange({ + status, + message, + ruleInfo: { ruleId, ruleUuid, ruleName, @@ -236,68 +256,88 @@ export const createRuleExecutionLogClientForExecutors = ( ruleType, spaceId, executionId, - metrics, - }); - } + }, + }); + }; - eventLog.logStatusChange({ - ruleId, - ruleUuid, - ruleName, - ruleRevision, - ruleType, - spaceId, - executionId, - newStatus, - message, + /** + * @deprecated To be removed in favor of Alerting Framework's "execute" event + */ + const writeMetricsToEventLog = (metrics: RuleExecutionLogMetrics): void => { + eventLog.logExecutionMetrics({ + metrics: { + total_search_duration_ms: metrics.total_search_duration_ms, + total_indexing_duration_ms: metrics.total_indexing_duration_ms, + total_enrichment_duration_ms: metrics.total_enrichment_duration_ms, + frozen_indices_queried_count: metrics.frozen_indices_queried_count, + execution_gap_duration_s: metrics.gap_duration_s, + gap_range: metrics.gap_range, + }, + ruleInfo: { + ruleId, + ruleUuid, + ruleName, + ruleRevision, + ruleType, + spaceId, + executionId, + }, }); }; - return client; -}; + const writeMessageToEventLog = (message: string, logLevel: LogLevel): void => { + const { isEnabled, minLevel } = settings.extendedLogging; -interface NormalizedStatusChangeArgs { - newStatus: RuleExecutionStatus; - message: string; - metrics?: RuleExecutionMetrics; - userError?: boolean; -} + if (!isEnabled || minLevel === LogLevelSetting.off) { + return; + } + if (logLevelToNumber(logLevel) < logLevelToNumber(minLevel)) { + return; + } -const normalizeStatusChangeArgs = (args: StatusChangeArgs): NormalizedStatusChangeArgs => { - if (args.newStatus === RuleExecutionStatusEnum.running) { - return { - newStatus: args.newStatus, - message: '', - }; - } - const { newStatus, message, metrics, userError } = args; - - return { - newStatus, - message: truncateValue(message) ?? '', - metrics: metrics - ? { - total_search_duration_ms: normalizeDurations(metrics.searchDurations), - total_indexing_duration_ms: normalizeDurations(metrics.indexingDurations), - total_enrichment_duration_ms: normalizeDurations(metrics.enrichmentDurations), - execution_gap_duration_s: normalizeGap(metrics.executionGap), - gap_range: metrics.gapRange ?? undefined, - frozen_indices_queried_count: metrics.frozenIndicesQueriedCount, - } - : undefined, - userError, + eventLog.logMessage({ + ruleInfo: { + ruleId, + ruleUuid, + ruleName, + ruleRevision, + ruleType, + spaceId, + executionId, + }, + message, + logLevel, + }); }; -}; -const normalizeDurations = (durations?: string[]): number | undefined => { - if (durations == null) { - return undefined; - } + const writeMessageToConsole = (message: string, logLevel: LogLevel, logMeta: ExtMeta): void => { + switch (logLevel) { + case LogLevelEnum.trace: + logger.trace(`${message} ${baseLogSuffix}`, logMeta); + break; + case LogLevelEnum.debug: + logger.debug(`${message} ${baseLogSuffix}`, logMeta); + break; + case LogLevelEnum.info: + logger.info(`${message} ${baseLogSuffix}`, logMeta); + break; + case LogLevelEnum.warn: + logger.warn(`${message} ${baseLogSuffix}`, logMeta); + break; + case LogLevelEnum.error: + logger.error(`${message} ${baseLogSuffix}`, logMeta); + break; + default: + assertUnreachable(logLevel); + } + }; - const sumAsFloat = sum(durations.map(Number)); - return Math.round(sumAsFloat); -}; + return ruleExecutionLogClient; +} -const normalizeGap = (duration?: Duration): number | undefined => { - return duration ? Math.round(duration.asSeconds()) : undefined; -}; +interface ExecutionResultBuffer { + errors: Array<{ message: string; userError: boolean }>; + warnings: string[]; + metrics: RuleExecutionLogMetrics; + closed: boolean; +} diff --git a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/client_for_executors/client_interface.ts b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/client_for_executors/client_interface.ts index 705528efd6875..2cb5960e436fa 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/client_for_executors/client_interface.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/client_for_executors/client_interface.ts @@ -5,9 +5,8 @@ * 2.0. */ -import type { Duration } from 'moment'; +import type { ConsumerExecutionMetrics } from '@kbn/alerting-plugin/server/types'; import type { - LogLevel, RuleExecutionStatus, RuleExecutionStatusEnum, } from '../../../../../../../common/api/detection_engine/rule_monitoring'; @@ -54,17 +53,33 @@ export interface IRuleExecutionLogForExecutors { * Writes an error message to the event log at ERROR level. * Writes to console at DEBUG level by default (override via options.consoleLogLevel). */ - error(message: string, options?: LogMessageOptions): void; + error(message: string, options?: LogErrorMessageOptions): void; /** - * Writes information about new rule statuses and measured execution metrics: - * 1. To .kibana-* index as a custom `siem-detection-engine-rule-execution-info` saved object. - * This SO is used for fast access to last execution info of a large amount of rules. - * 2. To .kibana-event-log-* index in order to track history of rule executions. - * 3. To console logs. - * @param args Information about the status change event. + * Logs a rule execution metric like a number of source events found and a number of generated alerts. + * Metric names are type-checked against RuleExecutionMetrics. */ - logStatusChange(args: StatusChangeArgs): Promise; + logMetric( + metricName: Metric, + value: RuleExecutionLogMetrics[Metric] + ): void; + + /** + * Convenience method to log multiple rule execution metrics like a number of source events + * found and a number of generated alerts. + * Metric names are type-checked against RuleExecutionMetrics. + */ + logMetrics(metrics: Partial): void; + + /** + * Whether the logger is closed + */ + closed(): boolean; + + /** + * Closes the logger and writes the logged data to the event log + */ + close(): Promise; } /** @@ -117,25 +132,31 @@ export interface LogMessageOptions { consoleLogLevel?: LogLevel; } -export interface RunningStatusChangeArgs { - newStatus: RuleExecutionStatusEnum['running']; +export interface LogErrorMessageOptions extends LogMessageOptions { + /** + * Whether this is a user side error. + */ + userError?: boolean; } /** - * Information about the status change event. + * @deprecated To be removed in favor of Alerting Framework's "execute" event + * + * We have to accept total_search_duration_ms in the rule execution logger as + * it's difficult to extract this value from the Alerting Framework + * + * After fully migrating to the AF's execute event RuleExecutionLogMetrics should + * be removed. */ -export interface StatusChangeArgs { - newStatus: RuleExecutionStatus; - message?: string; - metrics?: MetricsArgs; - userError?: boolean; -} +export type RuleExecutionLogMetrics = Partial< + ConsumerExecutionMetrics & { total_search_duration_ms: number } +>; -export interface MetricsArgs { - searchDurations?: string[]; - indexingDurations?: string[]; - enrichmentDurations?: string[]; - executionGap?: Duration; - gapRange?: { gte: string; lte: string }; - frozenIndicesQueriedCount?: number; +/** + * Arguments for logging the final execution result. The status must not be 'running'. + */ +export interface ExecutionResult { + status: Exclude; + message: string; + userError?: boolean; } diff --git a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/event_log/event_log_writer.ts b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/event_log/event_log_writer.ts index 8cf947d94f958..2cd0286f393c3 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/event_log/event_log_writer.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_monitoring/logic/rule_execution_log/event_log/event_log_writer.ts @@ -8,17 +8,17 @@ import { SavedObjectsUtils } from '@kbn/core/server'; import type { IEventLogService } from '@kbn/event-log-plugin/server'; import { SAVED_OBJECT_REL_PRIMARY } from '@kbn/event-log-plugin/server'; -import type { LogLevel } from '../../../../../../../common/api/detection_engine/rule_monitoring'; +import type { + LogLevel, + RuleExecutionMetrics, + RuleExecutionStatus, +} from '../../../../../../../common/api/detection_engine/rule_monitoring'; import { - eventLogLevelFromExecutionStatus, logLevelToNumber, ruleExecutionStatusToNumber, } from '../../../../../../../common/api/detection_engine/rule_monitoring'; -import type { - RuleExecutionMetrics, - RuleExecutionStatus, -} from '../../../../../../../common/api/detection_engine/rule_monitoring/model'; import { + eventLogLevelFromExecutionStatus, LogLevelEnum, RuleExecutionEventTypeEnum, } from '../../../../../../../common/api/detection_engine/rule_monitoring/model'; @@ -33,7 +33,7 @@ export interface IEventLogWriter { logExecutionMetrics(args: ExecutionMetricsArgs): void; } -export interface BaseArgs { +export interface RuleInfo { ruleId: string; ruleUuid: string; ruleName: string; @@ -43,18 +43,26 @@ export interface BaseArgs { executionId: string; } -export interface MessageArgs extends BaseArgs { +export interface MessageArgs { logLevel: LogLevel; message: string; + ruleInfo: RuleInfo; } -export interface StatusChangeArgs extends BaseArgs { - newStatus: RuleExecutionStatus; +export interface StatusChangeArgs { + status: RuleExecutionStatus; message?: string; + ruleInfo: RuleInfo; } -export interface ExecutionMetricsArgs extends BaseArgs { +export interface ExecutionMetricsArgs { metrics: RuleExecutionMetrics; + ruleInfo: RuleInfo; +} + +export interface ExecutionResultLogEntry { + timestamp: string; + message: string; } export const createEventLogWriter = (eventLogService: IEventLogService): IEventLogWriter => { @@ -70,10 +78,10 @@ export const createEventLogWriter = (eventLogService: IEventLogService): IEventL '@timestamp': nowISO(), message: args.message, rule: { - id: args.ruleId, - uuid: args.ruleUuid, - name: args.ruleName, - category: args.ruleType, + id: args.ruleInfo.ruleId, + uuid: args.ruleInfo.ruleUuid, + name: args.ruleInfo.ruleName, + category: args.ruleInfo.ruleType, }, event: { kind: 'event', @@ -88,18 +96,18 @@ export const createEventLogWriter = (eventLogService: IEventLogService): IEventL alert: { rule: { execution: { - uuid: args.executionId, + uuid: args.ruleInfo.executionId, }, - revision: args.ruleRevision, + revision: args.ruleInfo.ruleRevision, }, }, - space_ids: [args.spaceId], + space_ids: [args.ruleInfo.spaceId], saved_objects: [ { rel: SAVED_OBJECT_REL_PRIMARY, type: RULE_SAVED_OBJECT_TYPE, - id: args.ruleId, - namespace: spaceIdToNamespace(args.spaceId), + id: args.ruleInfo.ruleId, + namespace: spaceIdToNamespace(args.ruleInfo.spaceId), }, ], }, @@ -107,15 +115,15 @@ export const createEventLogWriter = (eventLogService: IEventLogService): IEventL }, logStatusChange: (args: StatusChangeArgs): void => { - const logLevel = eventLogLevelFromExecutionStatus(args.newStatus); + const logLevel = eventLogLevelFromExecutionStatus(args.status); eventLogger.logEvent({ '@timestamp': nowISO(), message: args.message, rule: { - id: args.ruleId, - uuid: args.ruleUuid, - name: args.ruleName, - category: args.ruleType, + id: args.ruleInfo.ruleId, + uuid: args.ruleInfo.ruleUuid, + name: args.ruleInfo.ruleName, + category: args.ruleInfo.ruleType, }, event: { kind: 'event', @@ -130,20 +138,20 @@ export const createEventLogWriter = (eventLogService: IEventLogService): IEventL alert: { rule: { execution: { - uuid: args.executionId, - status: args.newStatus, - status_order: ruleExecutionStatusToNumber(args.newStatus), + uuid: args.ruleInfo.executionId, + status: args.status, + status_order: ruleExecutionStatusToNumber(args.status), }, - revision: args.ruleRevision, + revision: args.ruleInfo.ruleRevision, }, }, - space_ids: [args.spaceId], + space_ids: [args.ruleInfo.spaceId], saved_objects: [ { rel: SAVED_OBJECT_REL_PRIMARY, type: RULE_SAVED_OBJECT_TYPE, - id: args.ruleId, - namespace: spaceIdToNamespace(args.spaceId), + id: args.ruleInfo.ruleId, + namespace: spaceIdToNamespace(args.ruleInfo.spaceId), }, ], }, @@ -155,10 +163,10 @@ export const createEventLogWriter = (eventLogService: IEventLogService): IEventL eventLogger.logEvent({ '@timestamp': nowISO(), rule: { - id: args.ruleId, - uuid: args.ruleUuid, - name: args.ruleName, - category: args.ruleType, + id: args.ruleInfo.ruleId, + uuid: args.ruleInfo.ruleUuid, + name: args.ruleInfo.ruleName, + category: args.ruleInfo.ruleType, }, event: { kind: 'metric', @@ -173,19 +181,19 @@ export const createEventLogWriter = (eventLogService: IEventLogService): IEventL alert: { rule: { execution: { - uuid: args.executionId, + uuid: args.ruleInfo.executionId, metrics: args.metrics, }, - revision: args.ruleRevision, + revision: args.ruleInfo.ruleRevision, }, }, - space_ids: [args.spaceId], + space_ids: [args.ruleInfo.spaceId], saved_objects: [ { rel: SAVED_OBJECT_REL_PRIMARY, type: RULE_SAVED_OBJECT_TYPE, - id: args.ruleId, - namespace: spaceIdToNamespace(args.spaceId), + id: args.ruleInfo.ruleId, + namespace: spaceIdToNamespace(args.ruleInfo.spaceId), }, ], }, diff --git a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_preview/api/preview_rules/preview_rule_execution_logger.ts b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_preview/api/preview_rules/preview_rule_execution_logger.ts index d2a4c0aba9e0b..9f3f51d8a7c3f 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_preview/api/preview_rules/preview_rule_execution_logger.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_preview/api/preview_rules/preview_rule_execution_logger.ts @@ -5,19 +5,20 @@ * 2.0. */ -import type { - IRuleMonitoringService, - RuleExecutionContext, - StatusChangeArgs, -} from '../../../rule_monitoring'; +import type { ExecutionResult, IRuleMonitoringService } from '../../../rule_monitoring'; export interface IPreviewRuleExecutionLogger { factory: IRuleMonitoringService['createRuleExecutionLogClientForExecutors']; + getExecutionResult: () => ExecutionResult | undefined; + getErrors: () => string[]; + getWarnings: () => string[]; } -export const createPreviewRuleExecutionLogger = ( - loggedStatusChanges: Array -): IPreviewRuleExecutionLogger => { +export const createPreviewRuleExecutionLogger = (): IPreviewRuleExecutionLogger => { + const errors: string[] = []; + const warnings: string[] = []; + let executionResult: ExecutionResult | undefined; + return { factory: ({ context }) => { const spyLogger = { @@ -26,16 +27,28 @@ export const createPreviewRuleExecutionLogger = ( trace: () => {}, debug: () => {}, info: () => {}, - warn: () => {}, - error: () => {}, - - logStatusChange: (args: StatusChangeArgs): Promise => { - loggedStatusChanges.push({ ...context, ...args }); - return Promise.resolve(); + warn: (message: string) => { + warnings.push(message); + }, + error: (message: string) => { + errors.push(message); }, + logMetric: () => {}, + logMetrics: () => {}, + logExecutionResult: (result: ExecutionResult): void => { + executionResult = result; + }, + + closed: () => false, + close: () => Promise.resolve(), }; return Promise.resolve(spyLogger); }, + getExecutionResult: () => { + return executionResult; + }, + getErrors: () => errors, + getWarnings: () => warnings, }; }; diff --git a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_preview/api/preview_rules/route.ts b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_preview/api/preview_rules/route.ts index 18265de3cc2d4..43f31ecdb3ead 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_preview/api/preview_rules/route.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_preview/api/preview_rules/route.ts @@ -47,8 +47,6 @@ import { throwAuthzError } from '../../../../machine_learning/validation'; import { routeLimitedConcurrencyTag } from '../../../../../utils/route_limited_concurrency_tag'; import type { SecuritySolutionPluginRouter } from '../../../../../types'; -import type { RuleExecutionContext, StatusChangeArgs } from '../../../rule_monitoring'; - import type { ConfigType } from '../../../../../config'; import { alertInstanceFactoryStub } from './alert_instance_factory_stub'; import type { @@ -155,8 +153,7 @@ export const previewRulesRoute = ( const spaceId = siemClient.getSpaceId(); const previewId = uuidv4(); const username = security?.authc.getCurrentUser(request)?.username; - const loggedStatusChanges: Array = []; - const previewRuleExecutionLogger = createPreviewRuleExecutionLogger(loggedStatusChanges); + const previewRuleExecutionLogger = createPreviewRuleExecutionLogger(); const runState: Record = { isLoggedRequestsEnabled: request.query.enable_logged_requests, }; @@ -319,25 +316,23 @@ export const previewRulesRoute = ( ruleExecutionTimeout: `${PREVIEW_TIMEOUT_SECONDS}s`, })) as { state: TState; loggedRequests: RulePreviewLoggedRequest[] }); - const errors = loggedStatusChanges - .filter((item) => item.newStatus === RuleExecutionStatusEnum.failed) - .map((item) => item.message ?? 'Unknown Error'); - - const warnings = loggedStatusChanges - .filter((item) => item.newStatus === RuleExecutionStatusEnum['partial failure']) - .map((item) => item.message ?? 'Unknown Warning'); + const executionResult = previewRuleExecutionLogger.getExecutionResult(); logs.push({ - errors, - warnings, + errors: + executionResult?.status === RuleExecutionStatusEnum.failed + ? [executionResult?.message, ...previewRuleExecutionLogger.getErrors()] + : previewRuleExecutionLogger.getErrors(), + warnings: + executionResult?.status === RuleExecutionStatusEnum['partial failure'] + ? [executionResult?.message, ...previewRuleExecutionLogger.getWarnings()] + : previewRuleExecutionLogger.getWarnings(), startedAt: startedAt.toDate().toISOString(), duration: moment().diff(invocationStartTime, 'milliseconds'), ...(loggedRequests ? { requests: loggedRequests } : {}), }); - loggedStatusChanges.length = 0; - - if (errors.length) { + if (executionResult?.status === RuleExecutionStatusEnum.failed) { break; } diff --git a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/create_security_rule_type_wrapper.ts b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/create_security_rule_type_wrapper.ts index 9c403fea1dec5..fe122c64c34d6 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/create_security_rule_type_wrapper.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/create_security_rule_type_wrapper.ts @@ -5,7 +5,7 @@ * 2.0. */ -import { isEmpty, partition } from 'lodash'; +import { partition, sum } from 'lodash'; import agent from 'elastic-apm-node'; import type { estypes } from '@elastic/elasticsearch'; @@ -34,8 +34,6 @@ import { getNotificationResultsLink } from '../rule_actions_legacy'; // eslint-disable-next-line no-restricted-imports import { formatAlertForNotificationActions } from '../rule_actions_legacy/logic/notifications/schedule_notification_actions'; import { createResultObject } from './utils'; -import { RuleExecutionStatusEnum } from '../../../../common/api/detection_engine/rule_monitoring'; -import { truncateList } from '../rule_monitoring'; import aadFieldConversion from '../routes/index/signal_aad_mapping.json'; import { extractReferences, injectReferences } from './saved_object_references'; import { withSecuritySpan } from '../../../utils/with_security_span'; @@ -60,7 +58,6 @@ import { SECURITY_RULE_ID, SECURITY_TO, } from './utils/apm_field_names'; -import { checkErrorDetails } from './utils/check_error_details'; const aliasesFieldMap: FieldMap = {}; Object.entries(aadFieldConversion).forEach(([key, value]) => { @@ -207,15 +204,8 @@ export const createSecurityRuleTypeWrapper: CreateSecurityRuleTypeWrapper = ruleExecutionLogger.debug(`Starting execution with interval: ${interval}`); - await ruleExecutionLogger.logStatusChange({ - newStatus: RuleExecutionStatusEnum.running, - }); - let result = createResultObject(state); - const wrapperWarnings = []; - const wrapperErrors = []; - const primaryTimestamp = timestampOverride ?? TIMESTAMP; const secondaryTimestamp = primaryTimestamp !== TIMESTAMP && !timestampOverrideFallbackDisabled @@ -264,16 +254,11 @@ export const createSecurityRuleTypeWrapper: CreateSecurityRuleTypeWrapper = runtimeMappings = dataViewRuntimeMappings; } catch (exc) { if (SavedObjectsErrorHelpers.isNotFoundError(exc)) { - await ruleExecutionLogger.logStatusChange({ - newStatus: RuleExecutionStatusEnum.failed, - message: `Data view is not found.\nError: ${exc}`, + ruleExecutionLogger.error(`Data view is not found.\nError: ${exc}`, { userError: true, }); } else { - await ruleExecutionLogger.logStatusChange({ - newStatus: RuleExecutionStatusEnum.failed, - message: `Check for indices to search failed.\nError: ${exc}`, - }); + ruleExecutionLogger.error(`Check for indices to search failed.\nError: ${exc}`); } return { state: result.state }; @@ -297,7 +282,7 @@ export const createSecurityRuleTypeWrapper: CreateSecurityRuleTypeWrapper = isServerless: isServerless ?? false, }); - wrapperWarnings.push(...warnings); + warnings.forEach((warningMessage) => ruleExecutionLogger.warn(warningMessage)); const { tuples, @@ -317,7 +302,7 @@ export const createSecurityRuleTypeWrapper: CreateSecurityRuleTypeWrapper = alerting, }); if (rangeTuplesWarningMessage != null) { - wrapperWarnings.push(rangeTuplesWarningMessage); + ruleExecutionLogger.warn(rangeTuplesWarningMessage); } agent.setCustomContext({ [SECURITY_NUM_RANGE_TUPLES]: tuples.length }); @@ -335,15 +320,7 @@ export const createSecurityRuleTypeWrapper: CreateSecurityRuleTypeWrapper = ruleParams: params, }); } - wrapperErrors.push(gapErrorMessage); - await ruleExecutionLogger.logStatusChange({ - newStatus: RuleExecutionStatusEnum.failed, - message: gapErrorMessage, - metrics: { - executionGap: remainingGap, - gapRange: experimentalFeatures.storeGapsInEventLogEnabled ? gap : undefined, - }, - }); + ruleExecutionLogger.error(gapErrorMessage); } try { @@ -428,6 +405,14 @@ export const createSecurityRuleTypeWrapper: CreateSecurityRuleTypeWrapper = }, }); + // Log rule run result errors and warnings explicitly here. + // This may lead to the same errors or warnings logged twice. + // Duplicates issue will be address in https://github.com/elastic/kibana/issues/259389. + runResult.errors.forEach((message) => + ruleExecutionLogger.error(message, { userError: runResult.userError }) + ); + runResult.warningMessages.forEach((message) => ruleExecutionLogger.warn(message)); + const createdSignals = result.createdSignals.concat(runResult.createdSignals); const warningMessages = result.warningMessages.concat(runResult.warningMessages); result = { @@ -468,28 +453,29 @@ export const createSecurityRuleTypeWrapper: CreateSecurityRuleTypeWrapper = const disabledActions = rule.actions.filter( (action) => !actions.isActionTypeEnabled(action.actionTypeId) ); - - if (result.totalEventsFound != null) { - ruleExecutionLogger.info(`Found matching events: ${result.totalEventsFound}`); - } const suppressedAlertsCount = result.suppressedAlertsCount ?? 0; - if (suppressedAlertsCount > 0) { - ruleExecutionLogger.info(`Alerts suppressed: ${suppressedAlertsCount}`); - } - - const createdSignalsCount = result.createdSignals.length; - if (result.totalEventsFound != null && result.totalEventsFound > 0) { - const unaccountedEvents = - result.totalEventsFound - createdSignalsCount - suppressedAlertsCount; - if (unaccountedEvents > 0) { - ruleExecutionLogger.info( - `Events that did not result in alerts: ${unaccountedEvents}\nThis is typically because alerts for these events already exist from a previous rule execution, or events were excluded by value list exceptions. This number doesn't include suppressed alerts.` - ); - } - } + ruleExecutionLogger.logMetrics({ + total_search_duration_ms: + result.searchAfterTimes.length > 0 + ? Math.round(sum(result.searchAfterTimes.map(Number))) + : undefined, + total_indexing_duration_ms: + result.bulkCreateTimes.length > 0 + ? Math.round(sum(result.bulkCreateTimes.map(Number))) + : undefined, + total_enrichment_duration_ms: + result.enrichmentTimes.length > 0 + ? Math.round(sum(result.enrichmentTimes.map(Number))) + : undefined, + frozen_indices_queried_count: frozenIndicesQueriedCount, + alerts_suppressed_count: suppressedAlertsCount, + gap_duration_s: + gap && remainingGap ? Math.round(remainingGap.asSeconds()) : undefined, + gap_range: gap, + }); - ruleExecutionLogger.info(`Alerts created: ${createdSignalsCount}`); + const createdSignalsCount = result.createdSignals.length; agent.setCustomContext({ [SECURITY_NUM_ALERTS_CREATED]: createdSignalsCount }); @@ -498,76 +484,12 @@ export const createSecurityRuleTypeWrapper: CreateSecurityRuleTypeWrapper = alertsCreated: createdSignalsCount > 0, disabledActions, }); - wrapperWarnings.push(disabledActionsWarning); - } - - if (result.warningMessages.length > 0 || wrapperWarnings.length > 0) { - // write warning messages first because if we have still have an error to write - // we want to write the error messages last, so that the errors are set - // as the current status of the rule. - await ruleExecutionLogger.logStatusChange({ - newStatus: RuleExecutionStatusEnum['partial failure'], - message: truncateList(result.warningMessages.concat(wrapperWarnings)).join('\n\n'), - metrics: { - searchDurations: result.searchAfterTimes, - indexingDurations: result.bulkCreateTimes, - enrichmentDurations: result.enrichmentTimes, - frozenIndicesQueriedCount, - }, - }); - } - if (wrapperErrors.length > 0 || result.errors.length > 0) { - await ruleExecutionLogger.logStatusChange({ - newStatus: RuleExecutionStatusEnum.failed, - message: truncateList(result.errors.concat(wrapperErrors)).join(', '), - metrics: { - searchDurations: result.searchAfterTimes, - indexingDurations: result.bulkCreateTimes, - enrichmentDurations: result.enrichmentTimes, - executionGap: remainingGap, - gapRange: experimentalFeatures.storeGapsInEventLogEnabled ? gap : undefined, - frozenIndicesQueriedCount, - }, - userError: - result.userError || - result.errors.every((err) => checkErrorDetails(err).isUserError), - }); - } else if (!(result.warningMessages.length > 0) && !(wrapperWarnings.length > 0)) { - ruleExecutionLogger.debug('Security Rule execution completed'); - ruleExecutionLogger.debug( - `Indexed ${createdSignalsCount} alerts into "${ruleDataClient.indexNameWithNamespace( - spaceId - )}".${ - !isEmpty(tuples) - ? ` Searched between date ranges: ${JSON.stringify(tuples, null, 2)}.` - : '' - }` - ); - await ruleExecutionLogger.logStatusChange({ - newStatus: RuleExecutionStatusEnum.succeeded, - message: 'Rule execution completed successfully', - metrics: { - searchDurations: result.searchAfterTimes, - indexingDurations: result.bulkCreateTimes, - enrichmentDurations: result.enrichmentTimes, - frozenIndicesQueriedCount, - }, - }); + ruleExecutionLogger.warn(disabledActionsWarning); } } catch (error) { - const errorMessage = error.message ?? '(no error message given)'; - - await ruleExecutionLogger.logStatusChange({ - newStatus: RuleExecutionStatusEnum.failed, - message: `An error occurred during rule execution. ${errorMessage}`, - userError: checkErrorDetails(errorMessage).isUserError, - metrics: { - searchDurations: result.searchAfterTimes, - indexingDurations: result.bulkCreateTimes, - enrichmentDurations: result.enrichmentTimes, - frozenIndicesQueriedCount, - }, - }); + ruleExecutionLogger.error(error.message ?? '(no error message given)'); + } finally { + await ruleExecutionLogger.close(); } if (!isPreview && analytics) { diff --git a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/query/create_query_alert_type.test.ts b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/query/create_query_alert_type.test.ts index f3798a2a7f796..2beefd3fc3d0a 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/query/create_query_alert_type.test.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/query/create_query_alert_type.test.ts @@ -19,7 +19,6 @@ import { QUERY_RULE_TYPE_ID } from '@kbn/securitysolution-rules'; import { docLinksServiceMock } from '@kbn/core/server/mocks'; import { IndexPatternsFetcher } from '@kbn/data-views-plugin/server'; import { hasTimestampFields } from '../utils/utils'; -import { RuleExecutionStatusEnum } from '../../../../../common/api/detection_engine'; jest.mock('@kbn/data-views-plugin/server', () => ({ ...jest.requireActual('@kbn/data-views-plugin/server'), @@ -161,13 +160,10 @@ describe('Custom Query Alerts', () => { expect((await ruleDataClient.getWriter()).bulk).not.toHaveBeenCalled(); expect(eventsTelemetry.sendAsync).not.toHaveBeenCalled(); - expect(mockedStatusLogger.logStatusChange).toHaveBeenCalledWith( - expect.objectContaining({ - newStatus: RuleExecutionStatusEnum['partial failure'], - message: expect.stringContaining( - 'Unable to find matching indices for rule ALERT_RULE_NAME. This warning will persist until one of the following occurs: a matching index is created or the rule is disabled.' - ), - }) + expect(mockedStatusLogger.warn).toHaveBeenCalledWith( + expect.stringContaining( + 'Unable to find matching indices for rule ALERT_RULE_NAME. This warning will persist until one of the following occurs: a matching index is created or the rule is disabled.' + ) ); }); @@ -252,16 +248,11 @@ describe('Custom Query Alerts', () => { expect((await ruleDataClient.getWriter()).bulk).toHaveBeenCalled(); expect(eventsTelemetry.sendAsync).toHaveBeenCalled(); - // ensures that the last status written is a warning status - // and that status contains the error message - expect(mockedStatusLogger.logStatusChange).lastCalledWith( - expect.objectContaining({ - newStatus: RuleExecutionStatusEnum['partial failure'], - message: - 'Timestamp fields check failed to execute Error: hastTimestampFields test error\n' + - '\n' + - "The rule's max alerts per run setting (10000) is greater than the Kibana alerting limit (1000). The rule will only write a maximum of 1000 alerts per rule run.", - }) + expect(mockedStatusLogger.warn).toHaveBeenCalledWith( + 'Timestamp fields check failed to execute Error: hastTimestampFields test error' + ); + expect(mockedStatusLogger.warn).toHaveBeenCalledWith( + "The rule's max alerts per run setting (10000) is greater than the Kibana alerting limit (1000). The rule will only write a maximum of 1000 alerts per rule run." ); }); }); diff --git a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/utils/utils.test.ts b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/utils/utils.test.ts index c8b7f99d26f52..84b75e61f6c20 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/utils/utils.test.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/utils/utils.test.ts @@ -16,7 +16,6 @@ import type { SanitizedRuleAction } from '@kbn/alerting-plugin/common'; import { alertsMock } from '@kbn/alerting-plugin/server/mocks'; import { listMock } from '@kbn/lists-plugin/server/mocks'; import type { ExceptionListClient } from '@kbn/lists-plugin/server'; -import { RuleExecutionStatusEnum } from '../../../../../common/api/detection_engine/rule_monitoring'; import { getListArrayMock } from '../../../../../common/detection_engine/schemas/types/lists.mock'; import { getExceptionListItemSchemaMock } from '@kbn/lists-plugin/common/schemas/response/exception_list_item_schema.mock'; @@ -503,7 +502,7 @@ describe('utils', () => { }, }; - const { foundNoIndices } = await hasTimestampFields({ + const { foundNoIndices, warningMessage } = await hasTimestampFields({ timestampField, timestampFieldCapsResponse: timestampFieldCapsResponse as TransportResult< FieldCapsResponse, @@ -513,11 +512,9 @@ describe('utils', () => { }); expect(foundNoIndices).toBeFalsy(); - expect(ruleExecutionLogger.logStatusChange).toHaveBeenCalledWith({ - newStatus: RuleExecutionStatusEnum['partial failure'], - message: - 'The following indices are missing the timestamp override field "event.ingested": ["myfakeindex-1","myfakeindex-2"]', - }); + expect(warningMessage).toBe( + 'The following indices are missing the timestamp override field "event.ingested": ["myfakeindex-1","myfakeindex-2"]' + ); }); test('returns true when missing timestamp field', async () => { @@ -544,7 +541,7 @@ describe('utils', () => { }, }; - const { foundNoIndices } = await hasTimestampFields({ + const { foundNoIndices, warningMessage } = await hasTimestampFields({ timestampField, timestampFieldCapsResponse: timestampFieldCapsResponse as TransportResult< FieldCapsResponse, @@ -554,11 +551,9 @@ describe('utils', () => { }); expect(foundNoIndices).toBeFalsy(); - expect(ruleExecutionLogger.logStatusChange).toHaveBeenCalledWith({ - newStatus: RuleExecutionStatusEnum['partial failure'], - message: - 'The following indices are missing the timestamp field "@timestamp": ["myfakeindex-1","myfakeindex-2"]', - }); + expect(warningMessage).toBe( + 'The following indices are missing the timestamp field "@timestamp": ["myfakeindex-1","myfakeindex-2"]' + ); }); }); diff --git a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/utils/utils.ts b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/utils/utils.ts index 265bc2c1e12bb..bd9c22bc6eeb9 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/utils/utils.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/utils/utils.ts @@ -39,7 +39,6 @@ import type { SanitizedRuleAction } from '@kbn/alerting-plugin/common'; import type { SuppressionFieldsLatest } from '@kbn/rule-registry-plugin/common/schemas'; import type { TimestampOverride } from '../../../../../common/api/detection_engine/model/rule_schema'; import type { Privilege } from '../../../../../common/api/detection_engine'; -import { RuleExecutionStatusEnum } from '../../../../../common/api/detection_engine/rule_monitoring'; import type { SearchAfterAndBulkCreateReturnType, SignalSearchResponse, @@ -110,10 +109,7 @@ export const hasTimestampFields = async (args: { : timestampFieldCapsResponse.body.fields[timestampField]?.unmapped?.indices )}`; - await ruleExecutionLogger.logStatusChange({ - newStatus: RuleExecutionStatusEnum['partial failure'], - message: errorString, - }); + ruleExecutionLogger.warn(errorString); return { foundNoIndices: false, warningMessage: errorString }; } @@ -347,10 +343,7 @@ export const getRuleRangeTuples = async ({ if (maxSignals > maxAlertsAllowed) { maxSignalsToUse = maxAlertsAllowed; warningStatusMessage = `The rule's max alerts per run setting (${maxSignals}) is greater than the Kibana alerting limit (${maxAlertsAllowed}). The rule will only write a maximum of ${maxAlertsAllowed} alerts per rule run.`; - await ruleExecutionLogger.logStatusChange({ - newStatus: RuleExecutionStatusEnum['partial failure'], - message: warningStatusMessage, - }); + ruleExecutionLogger.warn(warningStatusMessage); } const tuples = [