From f6580075c1165049420906c081deeafc883b9da2 Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Sat, 21 Oct 2017 12:09:54 +0200 Subject: [PATCH] Keep cumulative elapsed scroll time in microseconds Today we internally accumulate elapsed scroll time in nanoseconds. The problem here is that this can reasonably overflow. For example, on a system with scrolls that are open for ten minutes on average, after sixteen million scrolls the largest value that can be represented by a long will be executed. To address this, we switch to internally representing scrolls using microseconds as this enables with the same number of scrolls scrolls that are open for seven days on average, or with the same average elapsed time sixteen billion scrolls which will never happen (executing one scroll a second until sixteen billion have executed would not occur until more than five-hundred years had elapsed). --- .../index/search/stats/ShardSearchStats.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/search/stats/ShardSearchStats.java b/core/src/main/java/org/elasticsearch/index/search/stats/ShardSearchStats.java index 37c7b13ec79c9..636977029107e 100644 --- a/core/src/main/java/org/elasticsearch/index/search/stats/ShardSearchStats.java +++ b/core/src/main/java/org/elasticsearch/index/search/stats/ShardSearchStats.java @@ -180,12 +180,19 @@ public void onNewScrollContext(SearchContext context) { public void onFreeScrollContext(SearchContext context) { totalStats.scrollCurrent.dec(); assert totalStats.scrollCurrent.count() >= 0; - totalStats.scrollMetric.inc(System.nanoTime() - context.getOriginNanoTime()); + totalStats.scrollMetric.inc(TimeUnit.NANOSECONDS.toMicros(System.nanoTime() - context.getOriginNanoTime())); } static final class StatsHolder { public final MeanMetric queryMetric = new MeanMetric(); public final MeanMetric fetchMetric = new MeanMetric(); + /* We store scroll statistics in microseconds because with nanoseconds we run the risk of overflowing the total stats if there are + * many scrolls. For example, on a system with 2^24 scrolls that have been executed, each executing for 2^10 seconds, then using + * nanoseconds would require a numeric representation that can represent at least 2^24 * 2^10 * 10^9 > 2^24 * 2^10 * 2^29 = 2^63 + * which exceeds the largest value that can be represented by a long. By using microseconds, we enable capturing one-thousand + * times as many scrolls (i.e., billions of scrolls which at one per second would take 32 years to occur), or scrolls that execute + * for one-thousand times as long (i.e., scrolls that execute for almost twelve days on average). + */ public final MeanMetric scrollMetric = new MeanMetric(); public final MeanMetric suggestMetric = new MeanMetric(); public final CounterMetric queryCurrent = new CounterMetric(); @@ -197,7 +204,7 @@ public SearchStats.Stats stats() { return new SearchStats.Stats( queryMetric.count(), TimeUnit.NANOSECONDS.toMillis(queryMetric.sum()), queryCurrent.count(), fetchMetric.count(), TimeUnit.NANOSECONDS.toMillis(fetchMetric.sum()), fetchCurrent.count(), - scrollMetric.count(), TimeUnit.NANOSECONDS.toMillis(scrollMetric.sum()), scrollCurrent.count(), + scrollMetric.count(), TimeUnit.MICROSECONDS.toMillis(scrollMetric.sum()), scrollCurrent.count(), suggestMetric.count(), TimeUnit.NANOSECONDS.toMillis(suggestMetric.sum()), suggestCurrent.count() ); }