From fb27ec864f625cb71c9188498c79c867fb7dbc54 Mon Sep 17 00:00:00 2001 From: Phillip Burch Date: Tue, 27 Jul 2021 16:53:13 -0500 Subject: [PATCH 1/8] Don't skip last bucket for most aggs --- .../server/lib/alerting/metric_threshold/lib/metric_query.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts index a9fefd57d01e6..2e0b34e2686e6 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts @@ -57,7 +57,7 @@ export const getElasticsearchMetricQuery = ( }; const baseAggs = - aggType === Aggregators.RATE + [Aggregators.AVERAGE, Aggregators.P95, Aggregators.P99].indexOf(aggType) > -1 ? { aggregatedIntervals: { date_histogram: { From d64620428575242ee5f4c43d24d83bb0bd62d26f Mon Sep 17 00:00:00 2001 From: Phillip Burch Date: Tue, 27 Jul 2021 21:48:14 -0500 Subject: [PATCH 2/8] Allow alerting on partial buckets for certain aggs --- .../metric_threshold/lib/evaluate_alert.ts | 37 ++++++++++++------- .../metric_threshold/lib/metric_query.ts | 7 +++- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts index aeeb705f9b25a..a2e39eb11fb1e 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts @@ -6,6 +6,7 @@ */ import { mapValues, first, last, isNaN } from 'lodash'; +import moment from 'moment'; import { ElasticsearchClient } from 'kibana/server'; import { isTooManyBucketsPreviewException, @@ -121,7 +122,10 @@ const getMetric: ( const intervalAsSeconds = getIntervalInSeconds(interval); const intervalAsMS = intervalAsSeconds * 1000; - const to = roundTimestamp(timeframe ? timeframe.end : Date.now(), timeUnit); + const to = moment(timeframe ? timeframe.end : Date.now()) + .add(1, timeUnit) + .startOf(timeUnit) + .valueOf(); // We need enough data for 5 buckets worth of data. We also need // to convert the intervalAsSeconds to milliseconds. const minimumFrom = to - intervalAsMS * MINIMUM_BUCKETS; @@ -223,13 +227,12 @@ const getValuesFromAggregations = ( try { const { buckets } = aggregations.aggregatedIntervals; if (!buckets.length) return null; // No Data state + if (aggType === Aggregators.COUNT) { - return buckets - .map((bucket) => ({ - key: bucket.from_as_string, - value: bucket.doc_count, - })) - .filter(dropPartialBuckets(dropPartialBucketsOptions)); + return buckets.map((bucket) => ({ + key: bucket.from_as_string, + value: bucket.doc_count, + })); } if (aggType === Aggregators.P95 || aggType === Aggregators.P99) { return buckets @@ -241,12 +244,20 @@ const getValuesFromAggregations = ( }) .filter(dropPartialBuckets(dropPartialBucketsOptions)); } - return buckets - .map((bucket) => ({ - key: bucket.key_as_string ?? bucket.from_as_string, - value: bucket.aggregatedValue?.value ?? null, - })) - .filter(dropPartialBuckets(dropPartialBucketsOptions)); + + if (aggType === Aggregators.AVERAGE) { + buckets + .map((bucket) => ({ + key: bucket.key_as_string ?? bucket.from_as_string, + value: bucket.aggregatedValue?.value ?? null, + })) + .filter(dropPartialBuckets(dropPartialBucketsOptions)); + } + + return buckets.map((bucket) => ({ + key: bucket.key_as_string ?? bucket.from_as_string, + value: bucket.aggregatedValue?.value ?? null, + })); } catch (e) { return NaN; // Error state } diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts index 2e0b34e2686e6..834697148abaf 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts @@ -39,7 +39,10 @@ export const getElasticsearchMetricQuery = ( const to = timeframe.end; const from = timeframe.start; const offset = calculateDateHistogramOffset({ from, to, interval, field: timefield }); - const offsetInMS = parseInt(offset, 10); + const offsetInMS = + [Aggregators.P95, Aggregators.P99, Aggregators.AVERAGE].indexOf(aggType) > -1 + ? parseInt(offset, 10) + : 0; const aggregations = aggType === Aggregators.COUNT @@ -57,7 +60,7 @@ export const getElasticsearchMetricQuery = ( }; const baseAggs = - [Aggregators.AVERAGE, Aggregators.P95, Aggregators.P99].indexOf(aggType) > -1 + aggType === Aggregators.RATE ? { aggregatedIntervals: { date_histogram: { From 5616385d6d2c4420964991c4d2d663b213233791 Mon Sep 17 00:00:00 2001 From: Phillip Burch Date: Wed, 28 Jul 2021 07:05:40 -0500 Subject: [PATCH 3/8] Fix test, PR feedback, and some comments --- .../metric_threshold/lib/evaluate_alert.ts | 34 +++++++++++-------- .../metric_threshold/lib/metric_query.ts | 16 ++++++--- .../metric_threshold_executor.test.ts | 4 +-- 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts index a2e39eb11fb1e..29728859d585a 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts @@ -235,23 +235,29 @@ const getValuesFromAggregations = ( })); } if (aggType === Aggregators.P95 || aggType === Aggregators.P99) { - return buckets - .map((bucket) => { - const values = bucket.aggregatedValue?.values || []; - const firstValue = first(values); - if (!firstValue) return null; - return { key: bucket.from_as_string, value: firstValue.value }; - }) - .filter(dropPartialBuckets(dropPartialBucketsOptions)); + return ( + buckets + .map((bucket) => { + const values = bucket.aggregatedValue?.values || []; + const firstValue = first(values); + if (!firstValue) return null; + return { key: bucket.from_as_string, value: firstValue.value }; + }) + // P95 & P99 can only be calculated accurately on full buckets + .filter(dropPartialBuckets(dropPartialBucketsOptions)) + ); } if (aggType === Aggregators.AVERAGE) { - buckets - .map((bucket) => ({ - key: bucket.key_as_string ?? bucket.from_as_string, - value: bucket.aggregatedValue?.value ?? null, - })) - .filter(dropPartialBuckets(dropPartialBucketsOptions)); + return ( + buckets + .map((bucket) => ({ + key: bucket.key_as_string ?? bucket.from_as_string, + value: bucket.aggregatedValue?.value ?? null, + })) + // AVG can only be calculated accurately on full buckets + .filter(dropPartialBuckets(dropPartialBucketsOptions)) + ); } return buckets.map((bucket) => ({ diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts index 834697148abaf..6889a1ea9c08b 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts @@ -38,11 +38,17 @@ export const getElasticsearchMetricQuery = ( const intervalAsMS = intervalAsSeconds * 1000; const to = timeframe.end; const from = timeframe.start; - const offset = calculateDateHistogramOffset({ from, to, interval, field: timefield }); - const offsetInMS = - [Aggregators.P95, Aggregators.P99, Aggregators.AVERAGE].indexOf(aggType) > -1 - ? parseInt(offset, 10) - : 0; + let offset = '0ms'; + let offsetInMS = 0; + + if ( + [Aggregators.P95, Aggregators.P99, Aggregators.AVERAGE, Aggregators.RATE].indexOf(aggType) > -1 + ) { + // Only calculate offset for aggs that require a full bucket to evaluate + offset = calculateDateHistogramOffset({ from, to, interval, field: timefield }); + } + + offsetInMS = parseInt(offset, 10); const aggregations = aggType === Aggregators.COUNT diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts index 0dda299704d52..3fa10b81971c4 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts @@ -245,9 +245,9 @@ describe('The metric threshold alert type', () => { }, }); test('alerts based on the doc_count value instead of the aggregatedValue', async () => { - await execute(Comparator.GT, [2]); + await execute(Comparator.GT, [0.9]); expect(mostRecentAction(instanceID).id).toBe(FIRED_ACTIONS.id); - await execute(Comparator.LT, [1.5]); + await execute(Comparator.LT, [0.5]); expect(mostRecentAction(instanceID)).toBe(undefined); }); }); From 6087cfe613b15bfd823cb7171bd56f09ff1b8de6 Mon Sep 17 00:00:00 2001 From: Phillip Burch Date: Wed, 28 Jul 2021 12:03:13 -0500 Subject: [PATCH 4/8] Remove all offset logic for date_range aggs --- .../metric_threshold/lib/evaluate_alert.ts | 41 ++++++++++--------- .../metric_threshold/lib/metric_query.ts | 17 ++------ .../metric_threshold_executor.test.ts | 4 +- 3 files changed, 26 insertions(+), 36 deletions(-) diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts index 29728859d585a..28ba7e0778140 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts @@ -235,29 +235,30 @@ const getValuesFromAggregations = ( })); } if (aggType === Aggregators.P95 || aggType === Aggregators.P99) { - return ( - buckets - .map((bucket) => { - const values = bucket.aggregatedValue?.values || []; - const firstValue = first(values); - if (!firstValue) return null; - return { key: bucket.from_as_string, value: firstValue.value }; - }) - // P95 & P99 can only be calculated accurately on full buckets - .filter(dropPartialBuckets(dropPartialBucketsOptions)) - ); + return buckets.map((bucket) => { + const values = bucket.aggregatedValue?.values || []; + const firstValue = first(values); + if (!firstValue) return null; + return { key: bucket.from_as_string, value: firstValue.value }; + }); } if (aggType === Aggregators.AVERAGE) { - return ( - buckets - .map((bucket) => ({ - key: bucket.key_as_string ?? bucket.from_as_string, - value: bucket.aggregatedValue?.value ?? null, - })) - // AVG can only be calculated accurately on full buckets - .filter(dropPartialBuckets(dropPartialBucketsOptions)) - ); + return buckets.map((bucket) => ({ + key: bucket.key_as_string ?? bucket.from_as_string, + value: bucket.aggregatedValue?.value ?? null, + })); + // AVG can only be calculated accurately on full buckets + // .filter(dropPartialBuckets(dropPartialBucketsOptions)) + } + + if (aggType === Aggregators.RATE) { + return buckets + .map((bucket) => ({ + key: bucket.key_as_string ?? bucket.from_as_string, + value: bucket.aggregatedValue?.value ?? null, + })) + .filter(dropPartialBuckets(dropPartialBucketsOptions)); } return buckets.map((bucket) => ({ diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts index 6889a1ea9c08b..8ce4c65199330 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts @@ -38,17 +38,6 @@ export const getElasticsearchMetricQuery = ( const intervalAsMS = intervalAsSeconds * 1000; const to = timeframe.end; const from = timeframe.start; - let offset = '0ms'; - let offsetInMS = 0; - - if ( - [Aggregators.P95, Aggregators.P99, Aggregators.AVERAGE, Aggregators.RATE].indexOf(aggType) > -1 - ) { - // Only calculate offset for aggs that require a full bucket to evaluate - offset = calculateDateHistogramOffset({ from, to, interval, field: timefield }); - } - - offsetInMS = parseInt(offset, 10); const aggregations = aggType === Aggregators.COUNT @@ -72,7 +61,7 @@ export const getElasticsearchMetricQuery = ( date_histogram: { field: timefield, fixed_interval: interval, - offset, + offset: calculateDateHistogramOffset({ from, to, interval, field: timefield }), extended_bounds: { min: from, max: to, @@ -86,8 +75,8 @@ export const getElasticsearchMetricQuery = ( date_range: { field: timefield, ranges: Array.from(Array(Math.floor((to - from) / intervalAsMS)), (_, i) => ({ - from: from + intervalAsMS * i + offsetInMS, - to: from + intervalAsMS * (i + 1) + offsetInMS, + from: from + intervalAsMS * i, + to: from + intervalAsMS * (i + 1), })), }, aggregations, diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts index 3fa10b81971c4..fd1dc43c191fb 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts @@ -202,7 +202,7 @@ describe('The metric threshold alert type', () => { }); test('sends no alert when some, but not all, criteria cross the threshold', async () => { const instanceID = '*'; - await execute(Comparator.LT_OR_EQ, [1.0], [3.0]); + await execute(Comparator.LT_OR_EQ, [1.0], [2.5]); expect(mostRecentAction(instanceID)).toBe(undefined); }); test('alerts only on groups that meet all criteria when querying with a groupBy parameter', async () => { @@ -221,7 +221,7 @@ describe('The metric threshold alert type', () => { expect(reasons[0]).toContain('test.metric.1'); expect(reasons[1]).toContain('test.metric.2'); expect(reasons[0]).toContain('current value is 1'); - expect(reasons[1]).toContain('current value is 3.5'); + expect(reasons[1]).toContain('current value is 3'); expect(reasons[0]).toContain('threshold of 1'); expect(reasons[1]).toContain('threshold of 3'); }); From fa48ef88e1a67ca1a8f231aa99faaec391f43677 Mon Sep 17 00:00:00 2001 From: Phillip Burch Date: Wed, 28 Jul 2021 12:17:57 -0500 Subject: [PATCH 5/8] Remove code comment --- .../server/lib/alerting/metric_threshold/lib/evaluate_alert.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts index 28ba7e0778140..8b951e35b4d39 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts @@ -248,8 +248,6 @@ const getValuesFromAggregations = ( key: bucket.key_as_string ?? bucket.from_as_string, value: bucket.aggregatedValue?.value ?? null, })); - // AVG can only be calculated accurately on full buckets - // .filter(dropPartialBuckets(dropPartialBucketsOptions)) } if (aggType === Aggregators.RATE) { From bbee8bee46b0d06fba7e1437151bad1ff53455a7 Mon Sep 17 00:00:00 2001 From: Phillip Burch Date: Wed, 28 Jul 2021 13:00:06 -0500 Subject: [PATCH 6/8] Add delivery delay --- .../alerting/metric_threshold/lib/metric_query.ts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts index 8ce4c65199330..1a8b28ed53f28 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts @@ -39,6 +39,8 @@ export const getElasticsearchMetricQuery = ( const to = timeframe.end; const from = timeframe.start; + const deliveryDelay = 60 * 1000; // INFO: This allows us to account for any delay ES has in indexing the most recent data. + const aggregations = aggType === Aggregators.COUNT ? {} @@ -74,10 +76,12 @@ export const getElasticsearchMetricQuery = ( aggregatedIntervals: { date_range: { field: timefield, - ranges: Array.from(Array(Math.floor((to - from) / intervalAsMS)), (_, i) => ({ - from: from + intervalAsMS * i, - to: from + intervalAsMS * (i + 1), - })), + ranges: [ + { + from: from - intervalAsMS, + to: from + intervalAsMS - deliveryDelay, + }, + ], }, aggregations, }, From a47ba2804c6a2249ff48e3516ff520d7a216144a Mon Sep 17 00:00:00 2001 From: Phillip Burch Date: Wed, 28 Jul 2021 13:47:07 -0500 Subject: [PATCH 7/8] Fix the date range for query --- .../server/lib/alerting/metric_threshold/lib/metric_query.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts index 1a8b28ed53f28..cde84b217be95 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/metric_query.ts @@ -78,8 +78,8 @@ export const getElasticsearchMetricQuery = ( field: timefield, ranges: [ { - from: from - intervalAsMS, - to: from + intervalAsMS - deliveryDelay, + from: to - intervalAsMS - deliveryDelay, + to: to - deliveryDelay, }, ], }, From 59524a854ca551907a9968839da3ae8ae925e11f Mon Sep 17 00:00:00 2001 From: Phillip Burch Date: Wed, 28 Jul 2021 14:13:40 -0500 Subject: [PATCH 8/8] Add TODO --- .../server/lib/alerting/metric_threshold/lib/evaluate_alert.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts index 8b951e35b4d39..d3fa983ff9e84 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/lib/evaluate_alert.ts @@ -126,8 +126,10 @@ const getMetric: ( .add(1, timeUnit) .startOf(timeUnit) .valueOf(); + // We need enough data for 5 buckets worth of data. We also need // to convert the intervalAsSeconds to milliseconds. + // TODO: We only need to get 5 buckets for the rate query, so this logic should move there. const minimumFrom = to - intervalAsMS * MINIMUM_BUCKETS; const from = roundTimestamp(