Skip to content

Commit

Permalink
Merge branch 'main' into NR-89203
Browse files Browse the repository at this point in the history
  • Loading branch information
mdumpati authored Sep 27, 2023
2 parents c5997e9 + 9dbbbc7 commit 6dfd79f
Show file tree
Hide file tree
Showing 31 changed files with 6,594 additions and 144 deletions.
35 changes: 35 additions & 0 deletions alert-policies/adobe-commerce-business-insights/5xxErrors.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: 5xx Server Errors

description: |+
This alert is triggered if the customer faces 5xx server errors more than 5 times in 5 minutes.
type: STATIC
nrql:
query: "SELECT count(*) as '5xx Server Errors' from Transaction WHERE httpResponseCode LIKE '5%'"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 10
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
- priority: WARNING
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 5
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
35 changes: 35 additions & 0 deletions alert-policies/adobe-commerce-business-insights/cpuUsage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: CPU Usage (%)

description: |+
This alert is triggered if CPU usage exceeds 90% for 5 minutes.
type: STATIC
nrql:
query: "SELECT latest(host.cpuPercent) AS 'CPU Used %' FROM Metric"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 90
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
- priority: WARNING
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 80
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
35 changes: 35 additions & 0 deletions alert-policies/adobe-commerce-business-insights/downtime.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Downtime (%)

description: |+
This alert is triggered if Downtime is more than 1% for 2 minutes.
type: STATIC
nrql:
query: "SELECT percentage(count(result), where result = 'FAILED') as 'Downtime (%)' from SyntheticCheck"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 1
# Time in seconds; 120 - 3600
thresholdDuration: 120
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
- priority: WARNING
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 0.5
# Time in seconds; 120 - 3600
thresholdDuration: 120
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
35 changes: 35 additions & 0 deletions alert-policies/adobe-commerce-business-insights/memoryUsage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Memory Usage (%)

description: |+
This alert is triggered if Memory usage exceeds 90% for 5 minutes.
type: STATIC
nrql:
query: "SELECT latest(host.memoryUsedPercent) as 'Memory Used %' FROM Metric"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 90
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
- priority: WARNING
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 80
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
37 changes: 37 additions & 0 deletions alert-policies/vertica/DiskUsed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: Disk Space Used (%)

description: |+
This alert is triggered when disk space usage exceeds 90% for 5 minutes.
type: STATIC
nrql:
query: "SELECT latest(disk_space_free_percent) as 'Disk Space Used (%)' FROM VerticaDiskStorage"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE).
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition.
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation.
threshold: 90
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration?
thresholdOccurrences: ALL

- priority: WARNING
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation.
threshold: 85
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration?
thresholdOccurrences: ALL

# Duration after which a violation automatically closes.
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400

Loading

0 comments on commit 6dfd79f

Please sign in to comment.