diff --git a/cloud/aws/templates/aws_oidc/alarms.tf b/cloud/aws/templates/aws_oidc/alarms.tf new file mode 100644 index 00000000..57906772 --- /dev/null +++ b/cloud/aws/templates/aws_oidc/alarms.tf @@ -0,0 +1,121 @@ +// CPU Utilization +resource "aws_cloudwatch_metric_alarm" "cpu_utilization_too_high" { + count = var.rds_create_high_cpu_alarm ? 1 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-highCPUUtilization" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "CPUUtilization" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_max_cpu_utilization_threshold + alarm_description = "Average database CPU utilization is too high." + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } +} + +resource "aws_cloudwatch_metric_alarm" "cpu_credit_balance_too_low" { + count = var.rds_create_low_cpu_credit_alarm ? length(regexall("(t2|t3|t4)", var.postgres_instance_class)) > 0 ? 1 : 0 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-lowCPUCreditBalance" + comparison_operator = "LessThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "CPUCreditBalance" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_low_cpu_credit_balance_threshold + alarm_description = "Average database CPU credit balance is too low, a negative performance impact is imminent. When this alarm triggers, the database [instance class](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.DBInstanceClass.html) should be increased." + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } +} + +// Disk Utilization +resource "aws_cloudwatch_metric_alarm" "disk_queue_depth_too_high" { + count = var.rds_create_high_queue_depth_alarm ? 1 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-highDiskQueueDepth" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "DiskQueueDepth" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_disk_queue_depth_high_threshold + alarm_description = "Average database disk queue depth is too high, performance may be negatively impacted." + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } +} + +resource "aws_cloudwatch_metric_alarm" "disk_free_storage_space_too_low" { + count = var.rds_create_low_disk_space_alarm ? 1 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-lowFreeStorageSpace" + comparison_operator = "LessThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "FreeStorageSpace" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_disk_free_storage_low_threshold + alarm_description = "Average database free storage space is too low and may fill up soon." + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } +} + +resource "aws_cloudwatch_metric_alarm" "disk_burst_balance_too_low" { + count = var.rds_create_low_disk_burst_alarm ? 1 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-lowEBSBurstBalance" + comparison_operator = "LessThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "BurstBalance" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_disk_burst_balance_low_threshold + alarm_description = "Average database storage burst balance is too low, a negative performance impact is imminent." + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } +} + +// Memory Utilization +resource "aws_cloudwatch_metric_alarm" "memory_freeable_too_low" { + count = var.rds_create_low_memory_alarm ? 1 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-lowFreeableMemory" + comparison_operator = "LessThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "FreeableMemory" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_low_memory_threshold + alarm_description = "Average database freeable memory is too low, performance may be negatively impacted." + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } +} + +resource "aws_cloudwatch_metric_alarm" "memory_swap_usage_too_high" { + count = var.rds_create_swap_alarm ? 1 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-highSwapUsage" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "SwapUsage" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_high_swap_usage_threshold + alarm_description = "Average database swap usage is too high, performance may be negatively impacted." + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } +} diff --git a/cloud/aws/templates/aws_oidc/main.tf b/cloud/aws/templates/aws_oidc/main.tf index c81b5ee1..65997c7a 100644 --- a/cloud/aws/templates/aws_oidc/main.tf +++ b/cloud/aws/templates/aws_oidc/main.tf @@ -63,31 +63,6 @@ data "aws_db_instance" "civiform" { ] } -module "aws-rds-alarms" { - source = "lorenzoaiello/rds-alarms/aws" - version = "2.2.0" - db_instance_id = data.aws_db_instance.civiform.id - db_instance_class = var.postgres_instance_class - evaluation_period = var.rds_alarm_evaluation_period - statistic_period = var.rds_alarm_statistic_period - create_high_cpu_alarm = var.rds_create_high_cpu_alarm - cpu_utilization_too_high_threshold = var.rds_max_cpu_utilization_threshold - create_high_queue_depth_alarm = var.rds_create_high_queue_depth_alarm - disk_queue_depth_too_high_threshold = var.rds_disk_queue_depth_high_threshold - create_low_disk_space_alarm = var.rds_create_low_disk_space_alarm - disk_free_storage_space_too_low_threshold = var.rds_disk_free_storage_low_threshold - create_low_memory_alarm = var.rds_create_low_memory_alarm - memory_freeable_too_low_threshold = var.rds_low_memory_threshold - create_low_cpu_credit_alarm = var.rds_create_low_cpu_credit_alarm - cpu_credit_balance_too_low_threshold = var.rds_low_cpu_credit_balance_threshold - create_low_disk_burst_alarm = var.rds_create_low_disk_burst_alarm - disk_burst_balance_too_low_threshold = var.rds_disk_burst_balance_low_threshold - create_swap_alarm = var.rds_create_swap_alarm - memory_swap_usage_too_high_threshold = var.rds_high_swap_usage_threshold - create_anomaly_alarm = var.rds_create_anomaly_alarm - maximum_used_transaction_ids_too_high_threshold = var.rds_max_used_transaction_ids_high_threshold -} - module "email_service" { for_each = toset([ var.sender_email_address, diff --git a/cloud/aws/templates/aws_oidc/variables.tf b/cloud/aws/templates/aws_oidc/variables.tf index f3723e7a..c379717f 100644 --- a/cloud/aws/templates/aws_oidc/variables.tf +++ b/cloud/aws/templates/aws_oidc/variables.tf @@ -151,7 +151,7 @@ variable "rds_alarm_statistic_period" { variable "rds_create_high_cpu_alarm" { type = bool description = "Whether or not to create a high CPU alarm for RDS." - default = false + default = true } variable "rds_max_cpu_utilization_threshold" { @@ -163,7 +163,7 @@ variable "rds_max_cpu_utilization_threshold" { variable "rds_create_high_queue_depth_alarm" { type = bool description = "Whether or not to create a high queue depth alarm for RDS." - default = false + default = true } variable "rds_disk_queue_depth_high_threshold" { @@ -175,7 +175,7 @@ variable "rds_disk_queue_depth_high_threshold" { variable "rds_create_low_disk_space_alarm" { type = bool description = "Whether or not to create a low disk space alarm for RDS." - default = false + default = true } variable "rds_disk_free_storage_low_threshold" { @@ -187,7 +187,7 @@ variable "rds_disk_free_storage_low_threshold" { variable "rds_create_low_memory_alarm" { type = bool description = "Whether or not to create a low memory free alarm for RDS." - default = false + default = true } variable "rds_low_memory_threshold" { @@ -198,7 +198,7 @@ variable "rds_low_memory_threshold" { variable "rds_create_low_cpu_credit_alarm" { type = bool - description = "Whether or not to create a low CPU credit alarm for RDS." + description = "Whether or not to create a low CPU credit alarm for RDS. This alarm type only applies for T-type database instances." default = false }