Skip to content

Commit 1979d78

Browse files
afharoycombinator
andauthored
Cherry-pick to 7.9: [Metricbeat][Kibana] Apply backoff when errored at getting usage stats (#20772) (#21162)
Co-authored-by: Shaunak Kashyap <[email protected]> Co-authored-by: Shaunak Kashyap <[email protected]>
1 parent 2ab907f commit 1979d78

File tree

3 files changed

+52
-7
lines changed

3 files changed

+52
-7
lines changed

CHANGELOG.next.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
101101
- Fix storage metricset to allow config without region/zone. {issue}17623[17623] {pull}17624[17624]
102102
- Fix overflow on Prometheus rates when new buckets are added on the go. {pull}17753[17753]
103103
- Add a switch to the driver definition on SQL module to use pretty names {pull}17378[17378]
104+
- The Kibana collector applies backoff when errored at getting usage stats {pull}20772[20772]
104105
- The `elasticsearch/index` metricset only requests wildcard expansion for hidden indices if the monitored Elasticsearch cluster supports it. {pull}20938[20938]
105106
- Fix panic index out of range error when getting AWS account name. {pull}21101[21101] {issue}21095[21095]
106107
- Handle missing counters in the application_pool metricset. {pull}21071[21071]

metricbeat/module/kibana/stats/stats.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,10 @@ func init() {
3838
}
3939

4040
const (
41-
statsPath = "api/stats"
42-
settingsPath = "api/settings"
43-
usageCollectionPeriod = 24 * time.Hour
41+
statsPath = "api/stats"
42+
settingsPath = "api/settings"
43+
usageCollectionPeriod = 24 * time.Hour
44+
usageCollectionBackoff = 1 * time.Hour
4445
)
4546

4647
var (
@@ -57,6 +58,7 @@ type MetricSet struct {
5758
statsHTTP *helper.HTTP
5859
settingsHTTP *helper.HTTP
5960
usageLastCollectedOn time.Time
61+
usageNextCollectOn time.Time
6062
isUsageExcludable bool
6163
}
6264

@@ -165,6 +167,10 @@ func (m *MetricSet) fetchStats(r mb.ReporterV2, now time.Time) error {
165167

166168
content, err = m.statsHTTP.FetchContent()
167169
if err != nil {
170+
if shouldCollectUsage {
171+
// When errored in collecting the usage stats it may be counterproductive to try again on the next poll, try to collect the stats again after usageCollectionBackoff
172+
m.usageNextCollectOn = now.Add(usageCollectionBackoff)
173+
}
168174
return err
169175
}
170176

@@ -215,5 +221,5 @@ func (m *MetricSet) calculateIntervalMs() int64 {
215221
}
216222

217223
func (m *MetricSet) shouldCollectUsage(now time.Time) bool {
218-
return now.Sub(m.usageLastCollectedOn) > usageCollectionPeriod
224+
return now.Sub(m.usageLastCollectedOn) > usageCollectionPeriod && now.Sub(m.usageNextCollectOn) > 0
219225
}

metricbeat/module/kibana/stats/stats_test.go

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"net/http"
2424
"net/http/httptest"
2525
"testing"
26+
"time"
2627

2728
"github.com/stretchr/testify/require"
2829

@@ -48,12 +49,12 @@ func TestFetchUsage(t *testing.T) {
4849
w.WriteHeader(503)
4950

5051
case 1: // second call
51-
// Make sure exclude_usage is still false since first call failed
52-
require.Equal(t, "false", excludeUsage)
52+
// Make sure exclude_usage is true since first call failed and it should not try again until usageCollectionBackoff time has passed
53+
require.Equal(t, "true", excludeUsage)
5354
w.WriteHeader(200)
5455

5556
case 2: // third call
56-
// Make sure exclude_usage is now true since second call succeeded
57+
// Make sure exclude_usage is still true
5758
require.Equal(t, "true", excludeUsage)
5859
w.WriteHeader(200)
5960
}
@@ -76,3 +77,40 @@ func TestFetchUsage(t *testing.T) {
7677
// Third fetch
7778
mbtest.ReportingFetchV2Error(f)
7879
}
80+
81+
func TestShouldCollectUsage(t *testing.T) {
82+
now := time.Now()
83+
84+
cases := map[string]struct {
85+
usageLastCollectedOn time.Time
86+
usageNextCollectOn time.Time
87+
expectedResult bool
88+
}{
89+
"within_usage_collection_period": {
90+
usageLastCollectedOn: now.Add(-1 * usageCollectionPeriod),
91+
expectedResult: false,
92+
},
93+
"after_usage_collection_period_but_before_next_scheduled_collection": {
94+
usageLastCollectedOn: now.Add(-2 * usageCollectionPeriod),
95+
usageNextCollectOn: now.Add(3 * time.Hour),
96+
expectedResult: false,
97+
},
98+
"after_usage_collection_period_and_after_next_scheduled_collection": {
99+
usageLastCollectedOn: now.Add(-2 * usageCollectionPeriod),
100+
usageNextCollectOn: now.Add(-1 * time.Hour),
101+
expectedResult: true,
102+
},
103+
}
104+
105+
for name, test := range cases {
106+
t.Run(name, func(t *testing.T) {
107+
m := MetricSet{
108+
usageLastCollectedOn: test.usageLastCollectedOn,
109+
usageNextCollectOn: test.usageNextCollectOn,
110+
}
111+
112+
actualResult := m.shouldCollectUsage(now)
113+
require.Equal(t, test.expectedResult, actualResult)
114+
})
115+
}
116+
}

0 commit comments

Comments
 (0)