Skip to content

Commit

Permalink
Merge branch 'main' into NR-164846
Browse files Browse the repository at this point in the history
  • Loading branch information
RamanaReddy8801 authored Oct 18, 2023
2 parents 5b0d99a + 4851223 commit ce9e2e4
Show file tree
Hide file tree
Showing 46 changed files with 1,650 additions and 219 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: High Account Provisioned Read Capacity Utilization

description: |+
This alert is triggered when the Account Provisioned Read Capacity Utilization is above 90%.
type: STATIC
nrql:
query: "SELECT average(`aws.cassandra.AccountProvisionedReadCapacityUtilization`) FROM Metric"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 90
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: High Account Provisioned Write Capacity Utilization

description: |+
This alert is triggered when the Account Provisioned Write Capacity Utilization is above 90%.
type: STATIC
nrql:
query: "SELECT average(`aws.cassandra.AccountProvisionedWriteCapacityUtilization`) FROM Metric"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 90
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
27 changes: 27 additions & 0 deletions alert-policies/amazon-keyspaces/HighSystemErrors.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: High System Errors

description: |+
This alert is triggered when System Errors are above 10 in 10 minutes.
type: STATIC
nrql:
query: "SELECT count(`aws.cassandra.SystemErrors`) FROM Metric"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 10
# Time in seconds; 120 - 3600
thresholdDuration: 600
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
27 changes: 27 additions & 0 deletions alert-policies/amazon-keyspaces/HighUserErrors.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: High User Errors

description: |+
This alert is triggered when User Errors are above 10 in 10 minutes.
type: STATIC
nrql:
query: "SELECT count(`aws.cassandra.UserErrors`) FROM Metric"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 10
# Time in seconds; 120 - 3600
thresholdDuration: 600
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
34 changes: 34 additions & 0 deletions alert-policies/amazon-transfer-family/InboundFailedMessage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: High Inbound Failed Message

description: |+
This alert is triggered if the number of Inbound Failed Messages exceeds 20 for 10 minutes.
type: STATIC
nrql:
query: "SELECT sum(`aws.transfer.InboundFailedMessage`) as 'Query' FROM Metric"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 20
# Time in seconds; 120 - 3600
thresholdDuration: 600
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Adding a Warning threshold is optional
- priority: WARNING
operator: ABOVE
threshold: 10
thresholdDuration: 600
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: High Partial Upload Executions Failed

description: |+
This alert is triggered if the number of Partial Upload Execution Failure exceeds 20 for 10 minutes.
type: STATIC
nrql:
query: "SELECT sum(`aws.transfer.OnPartialUploadExecutionsFailed`) as 'Query' FROM Metric"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 20
# Time in seconds; 120 - 3600
thresholdDuration: 600
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Adding a Warning threshold is optional
- priority: WARNING
operator: ABOVE
threshold: 10
thresholdDuration: 600
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
34 changes: 34 additions & 0 deletions alert-policies/amazon-transfer-family/OnUploadExecutionsFailed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: High Upload Executions Failed

description: |+
This alert is triggered if the number of Upload Execution Failure exceeds 20 for 10 minutes.
type: STATIC
nrql:
query: "SELECT sum(`aws.transfer.OnUploadExecutionsFailed`) as 'Query' FROM Metric"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 20
# Time in seconds; 120 - 3600
thresholdDuration: 600
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Adding a Warning threshold is optional
- priority: WARNING
operator: ABOVE
threshold: 10
thresholdDuration: 600
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
Loading

0 comments on commit ce9e2e4

Please sign in to comment.