Skip to content
This repository was archived by the owner on May 1, 2024. It is now read-only.

Commit 6d86c4f

Browse files
committed
prevent showing recovered message if no alert has been shown
1 parent d54749f commit 6d86c4f

File tree

2 files changed

+82
-10
lines changed

2 files changed

+82
-10
lines changed

cmd_worker.go

+79-10
Original file line numberDiff line numberDiff line change
@@ -298,29 +298,33 @@ func (p *workerCmd) notify(testDefinition test.Test, uniqueHash *string, resultE
298298
firstErrorTime := p.getMinDurationFirstErrorTime(hash)
299299
testResult.FirstErrorTime = firstErrorTime
300300

301+
expireDuration := *testDefinition.MinDuration * time.Duration(testDefinition.MinDurationCacheFactor)
302+
301303
if testResult.Error != nil {
302304

303305
// With minDuration, we don't want to trigger the notification, unless the test has failed for a long enough amount of time
304306
if firstErrorTime != nil {
305307
diffFirstError := now.Unix() - *firstErrorTime
306308

307-
// We want to keep the ORIGINAL error time as first error, just extend the expiry time
308-
p.setMinDurationFirstErrorTime(hash, *firstErrorTime, *testDefinition.MinDuration*time.Duration(testDefinition.MinDurationCacheFactor))
309+
// We want to keep the ORIGINAL error time as alert shown, just extend the expiry time
310+
p.setMinDurationFirstErrorTime(hash, *firstErrorTime, expireDuration)
309311

310312
if diffFirstError < minDurationSeconds {
311313
// There is no need to trigger the notification, because not enough time has passed since the error got fired
312-
p.verbose(fmt.Sprintf("Skipping notification (minDuration, first error %s ago) for test `%s` (%s)\n",
314+
p.verbose(fmt.Sprintf("Skipping notification (minDuration, alert shown %s ago) for test `%s` (%s)\n",
313315
time.Duration(diffFirstError)*time.Second,
314316
testDefinition.Input, testDefinition.Target))
315317
return nil
316318
}
317319

318320
// Let's show the alert!
321+
// Mark it as shown
322+
p.setMinDurationAlertShown(hash, true, expireDuration)
319323

320324
} else {
321-
p.setMinDurationFirstErrorTime(hash, now.Unix(), *testDefinition.MinDuration*time.Duration(testDefinition.MinDurationCacheFactor))
325+
p.setMinDurationFirstErrorTime(hash, now.Unix(), expireDuration)
322326

323-
// Do not throw alert here, as it is the first error
327+
// Do not throw alert here, because this is the first generated alert
324328
return nil
325329
}
326330

@@ -330,19 +334,31 @@ func (p *workerCmd) notify(testDefinition test.Test, uniqueHash *string, resultE
330334
if firstErrorTime != nil {
331335
diffFirstError := now.Unix() - *firstErrorTime
332336

337+
alertShown := p.getMinDurationAlertShown(hash)
338+
333339
// Clear the cache because test got resolved
334340
p.clearMinDurationFirstErrorTime(hash)
341+
p.clearMinDurationAlertShown(hash)
335342
testResult.Recovered = true
336343

337344
// If the error has never been triggered, because min duration did not expire, do not trigger a recovered message
338345
if diffFirstError < minDurationSeconds {
339-
p.verbose(fmt.Sprintf("Test recovered (min duration cache cleared, skipping recovered message): `%s` (%s)\n",
346+
p.verbose(fmt.Sprintf("Test recovered (min duration not met, skipping recovered message): `%s` (%s)\n",
340347
testDefinition.Input, testDefinition.Target))
341348
return nil
342349
}
343350

344-
p.verbose(fmt.Sprintf("Test recovered (min duration cache cleared): `%s` (%s)\n",
351+
// Check if we want to show the recovered message
352+
if !alertShown {
353+
p.verbose(fmt.Sprintf("Test recovered (alert not shown, skipping recovered message): `%s` (%s)\n",
354+
testDefinition.Input, testDefinition.Target))
355+
356+
return nil
357+
}
358+
359+
p.verbose(fmt.Sprintf("Test recovered (min duration cache cleared, showing recovered message): `%s` (%s)\n",
345360
testDefinition.Input, testDefinition.Target))
361+
346362
} else {
347363

348364
/*
@@ -544,7 +560,7 @@ func (p *workerCmd) getMinDurationFirstErrorTime(hash string) *int64 {
544560
return nil
545561
}
546562

547-
fmt.Printf("Failed to get min-duration first error key: %s\n", err)
563+
fmt.Printf("Failed to get min-duration alert shown key: %s\n", err)
548564
return nil
549565
}
550566

@@ -559,7 +575,7 @@ func (p *workerCmd) setMinDurationFirstErrorTime(hash string, errorTime int64, e
559575
cacheKey := p.getMinDurationFirstErrorKey(hash)
560576
_, err := p._r.Set(cacheKey, errorTime, expiry).Result()
561577
if err != nil {
562-
fmt.Printf("Failed to set min-duration first error key: %s\n", err)
578+
fmt.Printf("Failed to set min-duration alert shown key: %s\n", err)
563579
}
564580
}
565581

@@ -571,7 +587,60 @@ func (p *workerCmd) clearMinDurationFirstErrorTime(hash string) {
571587
cacheKey := p.getMinDurationFirstErrorKey(hash)
572588
_, err := p._r.Del(cacheKey).Result()
573589
if err != nil {
574-
fmt.Printf("Failed to clear min-duration first error key: %s\n", err)
590+
fmt.Printf("Failed to clear min-duration alert shown key: %s\n", err)
591+
}
592+
}
593+
594+
func (p *workerCmd) getMinDurationAlertShownKey(hash string) string {
595+
return fmt.Sprintf("overseer.min-duration-alert-shown.%s", hash)
596+
}
597+
598+
func (p *workerCmd) getMinDurationAlertShown(hash string) bool {
599+
if p._r == nil {
600+
return false
601+
}
602+
603+
cacheKey := p.getMinDurationAlertShownKey(hash)
604+
alertShown, err := p._r.Get(cacheKey).Int64()
605+
if err != nil {
606+
if err == redis.Nil {
607+
// Key just does not exist
608+
return false
609+
}
610+
611+
fmt.Printf("Failed to get min-duration alert shown key: %s\n", err)
612+
return false
613+
}
614+
615+
return alertShown > 0
616+
}
617+
618+
func (p *workerCmd) setMinDurationAlertShown(hash string, shown bool, expiry time.Duration) {
619+
if p._r == nil {
620+
return
621+
}
622+
623+
alertShown := 0
624+
if shown {
625+
alertShown = 1
626+
}
627+
628+
cacheKey := p.getMinDurationAlertShownKey(hash)
629+
_, err := p._r.Set(cacheKey, alertShown, expiry).Result()
630+
if err != nil {
631+
fmt.Printf("Failed to set min-duration alert shown key: %s\n", err)
632+
}
633+
}
634+
635+
func (p *workerCmd) clearMinDurationAlertShown(hash string) {
636+
if p._r == nil {
637+
return
638+
}
639+
640+
cacheKey := p.getMinDurationAlertShownKey(hash)
641+
_, err := p._r.Del(cacheKey).Result()
642+
if err != nil {
643+
fmt.Printf("Failed to clear min-duration alert shown key: %s\n", err)
575644
}
576645
}
577646

scripts/test-run-enqueue-fail-min-duration.sh

+3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ dumb-test1 must run dumb-test with dumb-duration-min 2s with dumb-duration-max 2
1111
dumb-test1 must run dumb-test with dumb-duration-min 2s with dumb-duration-max 2s with min-duration 3s with retries 0 with fail-at 0 with test-label "Should alert (3s have passed)"
1212
dumb-test1 must run dumb-test with dumb-duration-min 2s with dumb-duration-max 2s with min-duration 3s with retries 0 with fail-at 98 with test-label "Should recover"
1313
dumb-test1 must run dumb-test with dumb-duration-min 2s with dumb-duration-max 2s with min-duration 3s with retries 0 with fail-at 0 with test-label "No alert"
14+
dumb-test1 must run dumb-test with dumb-duration-min 5s with dumb-duration-max 5s with min-duration 3s with retries 0 with fail-at 99 with test-label "No recover message (alert not shown)"
15+
dumb-test1 must run dumb-test with dumb-duration-min 2s with dumb-duration-max 2s with min-duration 3s with retries 0 with fail-at 99 with test-label "No recover message"
16+
dumb-test1 must run dumb-test with dumb-duration-min 2s with dumb-duration-max 2s with min-duration 3s with retries 0 with fail-at 99 with test-label "No recover message"
1417
dumb-test1 must run dumb-test with dumb-duration-min 2s with dumb-duration-max 2s with min-duration 3s with retries 0 with fail-at 99 with test-label "No recover message"
1518
EOL
1619

0 commit comments

Comments
 (0)