Skip to content

Commit

Permalink
Merge branch 'release' into revert-2409-release
Browse files Browse the repository at this point in the history
  • Loading branch information
nr-mlosier authored Jun 6, 2024
2 parents 78b1051 + c884592 commit 4ec5dd6
Show file tree
Hide file tree
Showing 35 changed files with 654 additions and 1,944 deletions.
65 changes: 0 additions & 65 deletions alert-policies/istio/static-alert.yml

This file was deleted.

40 changes: 40 additions & 0 deletions alert-policies/linkerd/ExcessTCPConnections.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Name of the alert
name: Excess TCP Connections

# Description and details
description: |+
This alert is triggered when TCP open connections exceeds 150 for 5 minutes.
# Type of alert
type: STATIC

# NRQL query
nrql:

query: "SELECT max(tcp_open_connections) FROM Metric"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 180
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
- priority: WARNING
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 150
# Time in seconds; 120 - 3600, must be a multiple of 60 for Baseline conditions
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
41 changes: 41 additions & 0 deletions alert-policies/linkerd/HighResponseLatency.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Name of the alert
name: Response Latency Bucket

# Description and details
description: |+
This alert is triggered when response latency exceeds 150 ms for 5 minutes.
# Type of alert
type: STATIC

# NRQL query
nrql:

query: "SELECT percentile(`response_latency_ms_bucket`, 95) FROM Metric"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 200
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
- priority: WARNING
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 150
# Time in seconds; 120 - 3600, must be a multiple of 60 for Baseline conditions
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
41 changes: 41 additions & 0 deletions alert-policies/linkerd/ProcessThreadsCountAlert.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Name of the alert
name: Process Threads Count

# Description and details
description: |+
This alert is triggered when the number of process threads exceeds 4 for 5 minutes.
# Type of alert
type: STATIC

# NRQL query
nrql:

query: "SELECT average(`process_threads`) AS `Thread Count Spike` FROM Metric"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 6
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
- priority: WARNING
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 4
# Time in seconds; 120 - 3600, must be a multiple of 60 for Baseline conditions
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
66 changes: 0 additions & 66 deletions dashboards/algorithmia/algorithmia.json

This file was deleted.

Binary file removed dashboards/algorithmia/algorithmia.png
Binary file not shown.
Loading

0 comments on commit 4ec5dd6

Please sign in to comment.