Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions spartan/metrics/terraform/grafana.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# See https://registry.terraform.io/providers/grafana/grafana/latest/docs

terraform {
required_providers {
grafana = {
source = "grafana/grafana"
version = "~> 3.13.2"
}
}
}

provider "grafana" {
url = var.grafana_url
auth = var.grafana_auth
}

resource "grafana_folder" "rule_folder" {
title = "Alerting Rules"
}


resource "grafana_contact_point" "slack" {
name = "slack"

slack {
url = var.slack_url
}
}

resource "grafana_notification_policy" "ignore_policy" {

@just-mitch just-mitch Nov 26, 2024

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the service_namespace is smoke, we never alert.

Otherwise, it goes to slack.

contact_point = grafana_contact_point.slack.name
group_by = ["service_namespace"]

policy {
contact_point = grafana_contact_point.slack.name

matcher {
label = "service_namespace"
match = "="
value = "smoke"
}

mute_timings = ["always"]
}
}

resource "grafana_mute_timing" "mute_timing_always" {
name = "always"

intervals {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this read as muting always? should some interval be in here?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the only notification policy that uses this "mute always" is when the service_namespace is smoke. Thoughts?

}
}

resource "grafana_rule_group" "rule_group_minutely" {
org_id = 1
name = "minutely-evaluation-group"
folder_uid = grafana_folder.rule_folder.uid
interval_seconds = 60

rule {
name = "Proven Chain is Live"
condition = "B"

data {
ref_id = "A"

relative_time_range {
from = 600
to = 0
}

datasource_uid = "spartan-metrics-prometheus"
model = jsonencode({
disableTextWrap = false,
editorMode = "code",
expr = "increase(aztec_archiver_block_height{aztec_status=\"proven\"}[30m])",
fullMetaSearch = false,
includeNullMetadata = true,
instant = true,
intervalMs = 1000,
legendFormat = "__auto",
maxDataPoints = 43200,
range = false,
refId = "A",
useBackend = false

})
}
data {
ref_id = "B"

relative_time_range {
from = 600
to = 0
}

datasource_uid = "__expr__"
model = jsonencode(
{
conditions = [
{
evaluator = { params = [1], type = "lt" },
operator = { type = "and" },
query = { params = ["C"] },
reducer = { params = [], type = "last" },
type = "query"
}
],
datasource = { type = "__expr__", uid = "__expr__" },
expression = "A",
intervalMs = 1000,
maxDataPoints = 43200,
refId = "C",
type = "threshold"
}
)
}

no_data_state = "NoData"
exec_err_state = "Error"
for = "1m"
annotations = {}
labels = {}
is_paused = false
}

}
11 changes: 11 additions & 0 deletions spartan/metrics/terraform/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
variable "grafana_url" {
type = string
}

variable "grafana_auth" {
type = string
}

variable "slack_url" {
type = string
}