Skip to content

Commit

Permalink
Merge branch 'release' into trm/azure-dashboards
Browse files Browse the repository at this point in the history
  • Loading branch information
caylahamann authored Feb 2, 2024
2 parents d17268b + 1c8d620 commit 2184d93
Show file tree
Hide file tree
Showing 27 changed files with 3,264 additions and 69 deletions.
32 changes: 32 additions & 0 deletions alert-policies/hardware-sentry/battery-charge.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Battery Charge

description: |+
This alert is triggered when a host battery charge falls too low.
type: STATIC
nrql:
query: "FROM Metric SELECT latest(hw.battery.charge) FACET site, host.name"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: BELOW
# Value that triggers a violation
threshold: .3
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
- priority: WARNING
# Operator used to compare against the threshold.
operator: BELOW
# Value that triggers a violation
threshold: .5
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
22 changes: 22 additions & 0 deletions alert-policies/hardware-sentry/connector-status.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Connector Status
description: |+
This alert is triggered when a connector becomes degraded.
type: STATIC
nrql:
query: "FROM Metric SELECT latest(hardware_sentry.connector.status) WHERE state = 'degraded' FACET site, host.name"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: EQUALS
# Value that triggers a violation
threshold: 1
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
23 changes: 23 additions & 0 deletions alert-policies/hardware-sentry/device-status(crit).yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Device Status (critical)

description: |+
This alert is triggered when a device is failing.
type: STATIC
nrql:
query: "FROM Metric SELECT latest(hw.status) WHERE state IN ('failed') OR battery.state = 'failed' FACET site, host.name"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: EQUALS
# Value that triggers a violation
threshold: 1
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
23 changes: 23 additions & 0 deletions alert-policies/hardware-sentry/device-status(warn).yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Device Status (warning)

description: |+
This alert is triggered when a device is in a warning state.
type: STATIC
nrql:
query: "FROM Metric SELECT latest(hw.battery.charge) FACET site, host.name"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: WARNING
# Operator used to compare against the threshold.
operator: EQUALS
# Value that triggers a violation
threshold: 1
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
22 changes: 22 additions & 0 deletions alert-policies/hardware-sentry/host-nonresponsive.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Host not responding
description: |+
This alert is triggered when a host is not responding.
type: STATIC
nrql:
query: "SELECT latest(hardware_sentry.host.up) FROM Metric FACET host.id"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: EQUALS
# Value that triggers a violation
threshold: 0
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
22 changes: 22 additions & 0 deletions alert-policies/hardware-sentry/missing-devices.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Physical disk endurance capacity
description: |+
This alert is triggered when a disk endurance utilization is very low.
type: STATIC
nrql:
query: "SELECT latest(hw.status) FROM Metric WHERE state = 'present' FACET host.name"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: EQUALS
# Value that triggers a violation
threshold: 0
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
31 changes: 31 additions & 0 deletions alert-policies/hardware-sentry/physical-disk-cap.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Physical disk endurance capacity
description: |+
This alert is triggered when a disk endurance utilization is very low.
type: STATIC
nrql:
query: "SELECT latest(hw.physical_disk.endurance_utilization) FROM Metric FACET host.id"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: BELOW
# Value that triggers a violation
threshold: .02
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
- priority: WARNING
# Operator used to compare against the threshold.
operator: BELOW
# Value that triggers a violation
threshold: .05
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
Loading

0 comments on commit 2184d93

Please sign in to comment.