From 030fe2231fb2a6a5d347dce8fcac54afbb765650 Mon Sep 17 00:00:00 2001 From: dkatzz <86739416+dkatzz@users.noreply.github.com> Date: Thu, 22 Jun 2023 12:50:41 -0400 Subject: [PATCH 1/9] Default RDS alarms to false --- cloud/aws/templates/aws_oidc/main.tf | 1 - cloud/aws/templates/aws_oidc/variables.tf | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/cloud/aws/templates/aws_oidc/main.tf b/cloud/aws/templates/aws_oidc/main.tf index d2c78095..6b8d872d 100644 --- a/cloud/aws/templates/aws_oidc/main.tf +++ b/cloud/aws/templates/aws_oidc/main.tf @@ -68,7 +68,6 @@ module "aws-rds-alarms" { version = "2.2.0" db_instance_id = data.aws_db_instance.civiform.id db_instance_class = var.postgres_instance_class - engine = "postgres" evaluation_period = var.rds_alarm_evaluation_period statistic_period = var.rds_alarm_statistic_period create_high_cpu_alarm = var.rds_create_high_cpu_alarm diff --git a/cloud/aws/templates/aws_oidc/variables.tf b/cloud/aws/templates/aws_oidc/variables.tf index d4de8607..f3723e7a 100644 --- a/cloud/aws/templates/aws_oidc/variables.tf +++ b/cloud/aws/templates/aws_oidc/variables.tf @@ -151,7 +151,7 @@ variable "rds_alarm_statistic_period" { variable "rds_create_high_cpu_alarm" { type = bool description = "Whether or not to create a high CPU alarm for RDS." - default = true + default = false } variable "rds_max_cpu_utilization_threshold" { @@ -163,7 +163,7 @@ variable "rds_max_cpu_utilization_threshold" { variable "rds_create_high_queue_depth_alarm" { type = bool description = "Whether or not to create a high queue depth alarm for RDS." - default = true + default = false } variable "rds_disk_queue_depth_high_threshold" { @@ -175,7 +175,7 @@ variable "rds_disk_queue_depth_high_threshold" { variable "rds_create_low_disk_space_alarm" { type = bool description = "Whether or not to create a low disk space alarm for RDS." - default = true + default = false } variable "rds_disk_free_storage_low_threshold" { @@ -187,7 +187,7 @@ variable "rds_disk_free_storage_low_threshold" { variable "rds_create_low_memory_alarm" { type = bool description = "Whether or not to create a low memory free alarm for RDS." - default = true + default = false } variable "rds_low_memory_threshold" { From f6391fb2d6498fa96c539b225267316927ef347d Mon Sep 17 00:00:00 2001 From: dkatzz <86739416+dkatzz@users.noreply.github.com> Date: Mon, 17 Jul 2023 11:00:32 -0400 Subject: [PATCH 2/9] Add remaining variables for rds alarms --- cloud/aws/templates/aws_oidc/main.tf | 3 +++ .../aws_oidc/variable_definitions.json | 18 ++++++++++++++++++ cloud/aws/templates/aws_oidc/variables.tf | 18 ++++++++++++++++++ cloud/shared/bin/lib/config_loader.py | 16 +++++++++++++++- 4 files changed, 54 insertions(+), 1 deletion(-) diff --git a/cloud/aws/templates/aws_oidc/main.tf b/cloud/aws/templates/aws_oidc/main.tf index 6b8d872d..f03f9e8d 100644 --- a/cloud/aws/templates/aws_oidc/main.tf +++ b/cloud/aws/templates/aws_oidc/main.tf @@ -70,6 +70,9 @@ module "aws-rds-alarms" { db_instance_class = var.postgres_instance_class evaluation_period = var.rds_alarm_evaluation_period statistic_period = var.rds_alarm_statistic_period + actions_alarm = var.rds_alarm_triggered_actions + actions_ok = var.rds_alarm_cleared_actions + tags = var.rds_alarm_tags create_high_cpu_alarm = var.rds_create_high_cpu_alarm cpu_utilization_too_high_threshold = var.rds_max_cpu_utilization_threshold create_high_queue_depth_alarm = var.rds_create_high_queue_depth_alarm diff --git a/cloud/aws/templates/aws_oidc/variable_definitions.json b/cloud/aws/templates/aws_oidc/variable_definitions.json index d1291bbd..b98662af 100644 --- a/cloud/aws/templates/aws_oidc/variable_definitions.json +++ b/cloud/aws/templates/aws_oidc/variable_definitions.json @@ -209,6 +209,24 @@ "tfvar": true, "type": "string" }, + "RDS_ALARM_TRIGGERED_ACTIONS": { + "required": false, + "secret": false, + "tfvar": true, + "type": "list" + }, + "RDS_ALARM_CLEARED_ACTIONS": { + "required": false, + "secret": false, + "tfvar": true, + "type": "list" + }, + "RDS_ALARM_TAGS": { + "required": false, + "secret": false, + "tfvar": true, + "type": "dict" + }, "RDS_CREATE_HIGH_CPU_ALARM": { "required": false, "secret": false, diff --git a/cloud/aws/templates/aws_oidc/variables.tf b/cloud/aws/templates/aws_oidc/variables.tf index f3723e7a..e7983b5d 100644 --- a/cloud/aws/templates/aws_oidc/variables.tf +++ b/cloud/aws/templates/aws_oidc/variables.tf @@ -148,6 +148,24 @@ variable "rds_alarm_statistic_period" { default = "60" } +variable "rds_alarm_triggered_actions" { + type = list(any) + description = "A list of actions to take when rds alarms are triggered." + default = [] +} + +variable "rds_alarm_cleared_actions" { + type = list(any) + description = "A list of actions to take when rds alarms are cleared." + default = [] +} + +variable "rds_alarm_tags" { + type = map(string) + description = "Tags to attach to each rds alarm." + default = {} +} + variable "rds_create_high_cpu_alarm" { type = bool description = "Whether or not to create a high CPU alarm for RDS." diff --git a/cloud/shared/bin/lib/config_loader.py b/cloud/shared/bin/lib/config_loader.py index bbe34286..93c07782 100644 --- a/cloud/shared/bin/lib/config_loader.py +++ b/cloud/shared/bin/lib/config_loader.py @@ -280,7 +280,7 @@ def _validate_civiform_server_env_vars( f"'{name}' is required but not set") continue - # Variable types are 'string', 'int', 'bool', or 'index-list'. + # Variable types are 'string', 'int', 'bool', or 'index-list', 'list', 'dict'. if variable.type == "string": if variable.values is not None: if config_value not in variable.values: @@ -311,6 +311,20 @@ def _validate_civiform_server_env_vars( ) continue + if variable.type == "list": + if not isinstance(config_value, list): + validation_errors.append( + f"'{name}' is required to be a list, got {config_value}" + ) + continue + + if variable.type == "dict": + if not isinstance(config_value, dict): + validation_errors.append( + f"'{name}' is required to be a map, got {config_value}" + ) + continue + # TODO(#4612): Add support for validation of items in an index-list. # An Index-list variables VAR is represented as a comma-separated string. # Individual fields in VAR can NOT currently be set the same way as on the From a20ffc20100055bb89fb1d74fd7d2aad27e218a6 Mon Sep 17 00:00:00 2001 From: dkatzz <86739416+dkatzz@users.noreply.github.com> Date: Mon, 17 Jul 2023 12:24:08 -0400 Subject: [PATCH 3/9] merge main --- cloud/aws/templates/aws_oidc/variables.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/aws/templates/aws_oidc/variables.tf b/cloud/aws/templates/aws_oidc/variables.tf index e7983b5d..529bf686 100644 --- a/cloud/aws/templates/aws_oidc/variables.tf +++ b/cloud/aws/templates/aws_oidc/variables.tf @@ -169,7 +169,7 @@ variable "rds_alarm_tags" { variable "rds_create_high_cpu_alarm" { type = bool description = "Whether or not to create a high CPU alarm for RDS." - default = false + default = true } variable "rds_max_cpu_utilization_threshold" { From 9d67f42d4a6a415f7c8634ff38766ac38dc5161c Mon Sep 17 00:00:00 2001 From: dkatzz <86739416+dkatzz@users.noreply.github.com> Date: Mon, 17 Jul 2023 13:19:02 -0400 Subject: [PATCH 4/9] Update main.tf --- cloud/aws/templates/aws_oidc/main.tf | 72 ++++++++++++++++++---------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/cloud/aws/templates/aws_oidc/main.tf b/cloud/aws/templates/aws_oidc/main.tf index 9708069f..55cc95a0 100644 --- a/cloud/aws/templates/aws_oidc/main.tf +++ b/cloud/aws/templates/aws_oidc/main.tf @@ -63,34 +63,54 @@ data "aws_db_instance" "civiform" { ] } -module "aws-rds-alarms" { - source = "lorenzoaiello/rds-alarms/aws" - version = "2.2.0" - db_instance_id = data.aws_db_instance.civiform.id - db_instance_class = var.postgres_instance_class - evaluation_period = var.rds_alarm_evaluation_period - statistic_period = var.rds_alarm_statistic_period - actions_alarm = var.rds_alarm_triggered_actions - actions_ok = var.rds_alarm_cleared_actions - tags = var.rds_alarm_tags - create_high_cpu_alarm = var.rds_create_high_cpu_alarm - cpu_utilization_too_high_threshold = var.rds_max_cpu_utilization_threshold - create_high_queue_depth_alarm = var.rds_create_high_queue_depth_alarm - disk_queue_depth_too_high_threshold = var.rds_disk_queue_depth_high_threshold - create_low_disk_space_alarm = var.rds_create_low_disk_space_alarm - disk_free_storage_space_too_low_threshold = var.rds_disk_free_storage_low_threshold - create_low_memory_alarm = var.rds_create_low_memory_alarm - memory_freeable_too_low_threshold = var.rds_low_memory_threshold - create_low_cpu_credit_alarm = var.rds_create_low_cpu_credit_alarm - cpu_credit_balance_too_low_threshold = var.rds_low_cpu_credit_balance_threshold - create_low_disk_burst_alarm = var.rds_create_low_disk_burst_alarm - disk_burst_balance_too_low_threshold = var.rds_disk_burst_balance_low_threshold - create_swap_alarm = var.rds_create_swap_alarm - memory_swap_usage_too_high_threshold = var.rds_high_swap_usage_threshold - create_anomaly_alarm = var.rds_create_anomaly_alarm - maximum_used_transaction_ids_too_high_threshold = var.rds_max_used_transaction_ids_high_threshold +resource "aws_cloudwatch_metric_alarm" "cpu_utilization_too_high" { + count = var.rds_create_high_cpu_alarm ? 1 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-highCPUUtilization" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "CPUUtilization" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_max_cpu_utilization_threshold + alarm_description = "Average database CPU utilization is too high." + alarm_actions = var.rds_alarm_triggered_actions + ok_actions = var.rds_alarm_cleared_actions + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } + tags = var.rds_alarm_tags } +#module "aws-rds-alarms" { +# source = "lorenzoaiello/rds-alarms/aws" +# version = "2.2.0" +# db_instance_id = data.aws_db_instance.civiform.id +# db_instance_class = var.postgres_instance_class +# evaluation_period = var.rds_alarm_evaluation_period +# statistic_period = var.rds_alarm_statistic_period +# actions_alarm = var.rds_alarm_triggered_actions +# actions_ok = var.rds_alarm_cleared_actions +# tags = var.rds_alarm_tags +# create_high_cpu_alarm = false +# cpu_utilization_too_high_threshold = var.rds_max_cpu_utilization_threshold +# create_high_queue_depth_alarm = var.rds_create_high_queue_depth_alarm +# disk_queue_depth_too_high_threshold = var.rds_disk_queue_depth_high_threshold +# create_low_disk_space_alarm = var.rds_create_low_disk_space_alarm +# disk_free_storage_space_too_low_threshold = var.rds_disk_free_storage_low_threshold +# create_low_memory_alarm = var.rds_create_low_memory_alarm +# memory_freeable_too_low_threshold = var.rds_low_memory_threshold +# create_low_cpu_credit_alarm = var.rds_create_low_cpu_credit_alarm +# cpu_credit_balance_too_low_threshold = var.rds_low_cpu_credit_balance_threshold +# create_low_disk_burst_alarm = var.rds_create_low_disk_burst_alarm +# disk_burst_balance_too_low_threshold = var.rds_disk_burst_balance_low_threshold +# create_swap_alarm = var.rds_create_swap_alarm +# memory_swap_usage_too_high_threshold = var.rds_high_swap_usage_threshold +# create_anomaly_alarm = var.rds_create_anomaly_alarm +# maximum_used_transaction_ids_too_high_threshold = var.rds_max_used_transaction_ids_high_threshold +#} + module "email_service" { for_each = toset([ var.sender_email_address, From f418d3ef3482863c04730056870dca52d262f26c Mon Sep 17 00:00:00 2001 From: dkatzz <86739416+dkatzz@users.noreply.github.com> Date: Mon, 17 Jul 2023 14:26:44 -0400 Subject: [PATCH 5/9] Add more alarm configs --- cloud/aws/templates/aws_oidc/alarms.tf | 170 +++++++++++++++++++++++++ cloud/aws/templates/aws_oidc/main.tf | 48 ------- 2 files changed, 170 insertions(+), 48 deletions(-) create mode 100644 cloud/aws/templates/aws_oidc/alarms.tf diff --git a/cloud/aws/templates/aws_oidc/alarms.tf b/cloud/aws/templates/aws_oidc/alarms.tf new file mode 100644 index 00000000..f54f91d6 --- /dev/null +++ b/cloud/aws/templates/aws_oidc/alarms.tf @@ -0,0 +1,170 @@ +resource "aws_cloudwatch_metric_alarm" "cpu_utilization_too_high" { + count = var.rds_create_high_cpu_alarm ? 1 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-highCPUUtilization" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "CPUUtilization" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_max_cpu_utilization_threshold + alarm_description = "Average database CPU utilization is too high." + alarm_actions = var.rds_alarm_triggered_actions + ok_actions = var.rds_alarm_cleared_actions + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } +} + +resource "aws_cloudwatch_metric_alarm" "cpu_credit_balance_too_low" { + count = var.rds_create_low_cpu_credit_alarm ? length(regexall("(t2|t3)", var.postgres_instance_class)) > 0 ? 1 : 0 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-lowCPUCreditBalance" + comparison_operator = "LessThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "CPUCreditBalance" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_low_cpu_credit_balance_threshold + alarm_description = "Average database CPU credit balance is too low, a negative performance impact is imminent." + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } +} + +// Disk Utilization +resource "aws_cloudwatch_metric_alarm" "disk_queue_depth_too_high" { + count = var.rds_create_high_queue_depth_alarm ? 1 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-highDiskQueueDepth" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "DiskQueueDepth" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_disk_queue_depth_high_threshold + alarm_description = "Average database disk queue depth is too high, performance may be negatively impacted." + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } +} + +resource "aws_cloudwatch_metric_alarm" "disk_free_storage_space_too_low" { + count = var.rds_create_low_disk_space_alarm ? 1 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-lowFreeStorageSpace" + comparison_operator = "LessThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "FreeStorageSpace" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_disk_free_storage_low_threshold + alarm_description = "Average database free storage space is too low and may fill up soon." + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } +} + +resource "aws_cloudwatch_metric_alarm" "disk_burst_balance_too_low" { + count = var.rds_create_low_disk_burst_alarm ? 1 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-lowEBSBurstBalance" + comparison_operator = "LessThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "BurstBalance" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_disk_burst_balance_low_threshold + alarm_description = "Average database storage burst balance is too low, a negative performance impact is imminent." + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } +} + +// Memory Utilization +resource "aws_cloudwatch_metric_alarm" "memory_freeable_too_low" { + count = var.rds_create_low_memory_alarm ? 1 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-lowFreeableMemory" + comparison_operator = "LessThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "FreeableMemory" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_low_memory_threshold + alarm_description = "Average database freeable memory is too low, performance may be negatively impacted." + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } +} + +resource "aws_cloudwatch_metric_alarm" "memory_swap_usage_too_high" { + count = var.rds_create_swap_alarm ? 1 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-highSwapUsage" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "SwapUsage" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_high_swap_usage_threshold + alarm_description = "Average database swap usage is too high, performance may be negatively impacted." + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } +} +# +#// Connection Count +#resource "aws_cloudwatch_metric_alarm" "connection_count_anomalous" { +# count = var.rds_create_anomaly_alarm ? 1 : 0 +# alarm_name = "rds-${data.aws_db_instance.civiform.id}-anomalousConnectionCount" +# comparison_operator = "GreaterThanUpperThreshold" +# evaluation_periods = var.rds_alarm_evaluation_period +# threshold_metric_id = "e1" +# alarm_description = "Anomalous database connection count detected. Something unusual is happening." +# +# metric_query { +# id = "e1" +# expression = "ANOMALY_DETECTION_BAND(m1, ${var.anomaly_band_width})" +# label = "DatabaseConnections (Expected)" +# return_data = "true" +# } +# +# metric_query { +# id = "m1" +# return_data = "true" +# metric { +# metric_name = "DatabaseConnections" +# namespace = "AWS/RDS" +# period = var.anomaly_period +# stat = "Average" +# unit = "Count" +# +# dimensions = { +# DBInstanceIdentifier = data.aws_db_instance.civiform.id +# } +# } +# } +#} +# +#// Early Warning System for Transaction ID Wraparound for postgres +#// more info - https://aws.amazon.com/blogs/database/implement-an-early-warning-system-for-transaction-id-wraparound-in-amazon-rds-for-postgresql/ +#resource "aws_cloudwatch_metric_alarm" "maximum_used_transaction_ids_too_high" { +# count = contains(["aurora-postgresql", "postgres"], var.engine) ? 1 : 0 +# alarm_name = "rds-${data.aws_db_instance.civiform.id}-maximumUsedTransactionIDs" +# comparison_operator = "GreaterThanThreshold" +# evaluation_periods = var.rds_alarm_evaluation_period +# metric_name = "MaximumUsedTransactionIDs" +# namespace = "AWS/RDS" +# period = var.rds_alarm_statistic_period +# statistic = "Average" +# threshold = var.rds_max_used_transaction_ids_high_threshold +# alarm_description = "Nearing a possible critical transaction ID wraparound." +#} \ No newline at end of file diff --git a/cloud/aws/templates/aws_oidc/main.tf b/cloud/aws/templates/aws_oidc/main.tf index 55cc95a0..65997c7a 100644 --- a/cloud/aws/templates/aws_oidc/main.tf +++ b/cloud/aws/templates/aws_oidc/main.tf @@ -63,54 +63,6 @@ data "aws_db_instance" "civiform" { ] } -resource "aws_cloudwatch_metric_alarm" "cpu_utilization_too_high" { - count = var.rds_create_high_cpu_alarm ? 1 : 0 - alarm_name = "rds-${data.aws_db_instance.civiform.id}-highCPUUtilization" - comparison_operator = "GreaterThanThreshold" - evaluation_periods = var.rds_alarm_evaluation_period - metric_name = "CPUUtilization" - namespace = "AWS/RDS" - period = var.rds_alarm_statistic_period - statistic = "Average" - threshold = var.rds_max_cpu_utilization_threshold - alarm_description = "Average database CPU utilization is too high." - alarm_actions = var.rds_alarm_triggered_actions - ok_actions = var.rds_alarm_cleared_actions - - dimensions = { - DBInstanceIdentifier = data.aws_db_instance.civiform.id - } - tags = var.rds_alarm_tags -} - -#module "aws-rds-alarms" { -# source = "lorenzoaiello/rds-alarms/aws" -# version = "2.2.0" -# db_instance_id = data.aws_db_instance.civiform.id -# db_instance_class = var.postgres_instance_class -# evaluation_period = var.rds_alarm_evaluation_period -# statistic_period = var.rds_alarm_statistic_period -# actions_alarm = var.rds_alarm_triggered_actions -# actions_ok = var.rds_alarm_cleared_actions -# tags = var.rds_alarm_tags -# create_high_cpu_alarm = false -# cpu_utilization_too_high_threshold = var.rds_max_cpu_utilization_threshold -# create_high_queue_depth_alarm = var.rds_create_high_queue_depth_alarm -# disk_queue_depth_too_high_threshold = var.rds_disk_queue_depth_high_threshold -# create_low_disk_space_alarm = var.rds_create_low_disk_space_alarm -# disk_free_storage_space_too_low_threshold = var.rds_disk_free_storage_low_threshold -# create_low_memory_alarm = var.rds_create_low_memory_alarm -# memory_freeable_too_low_threshold = var.rds_low_memory_threshold -# create_low_cpu_credit_alarm = var.rds_create_low_cpu_credit_alarm -# cpu_credit_balance_too_low_threshold = var.rds_low_cpu_credit_balance_threshold -# create_low_disk_burst_alarm = var.rds_create_low_disk_burst_alarm -# disk_burst_balance_too_low_threshold = var.rds_disk_burst_balance_low_threshold -# create_swap_alarm = var.rds_create_swap_alarm -# memory_swap_usage_too_high_threshold = var.rds_high_swap_usage_threshold -# create_anomaly_alarm = var.rds_create_anomaly_alarm -# maximum_used_transaction_ids_too_high_threshold = var.rds_max_used_transaction_ids_high_threshold -#} - module "email_service" { for_each = toset([ var.sender_email_address, From 89ba911c038214075c09ef559820207991b217b0 Mon Sep 17 00:00:00 2001 From: dkatzz <86739416+dkatzz@users.noreply.github.com> Date: Tue, 18 Jul 2023 12:39:17 -0400 Subject: [PATCH 6/9] Add additional variables and alarms --- cloud/aws/templates/aws_oidc/alarms.tf | 96 +++++++++++------------ cloud/aws/templates/aws_oidc/variables.tf | 22 +++++- 2 files changed, 68 insertions(+), 50 deletions(-) diff --git a/cloud/aws/templates/aws_oidc/alarms.tf b/cloud/aws/templates/aws_oidc/alarms.tf index f54f91d6..945b5dd6 100644 --- a/cloud/aws/templates/aws_oidc/alarms.tf +++ b/cloud/aws/templates/aws_oidc/alarms.tf @@ -120,51 +120,51 @@ resource "aws_cloudwatch_metric_alarm" "memory_swap_usage_too_high" { DBInstanceIdentifier = data.aws_db_instance.civiform.id } } -# -#// Connection Count -#resource "aws_cloudwatch_metric_alarm" "connection_count_anomalous" { -# count = var.rds_create_anomaly_alarm ? 1 : 0 -# alarm_name = "rds-${data.aws_db_instance.civiform.id}-anomalousConnectionCount" -# comparison_operator = "GreaterThanUpperThreshold" -# evaluation_periods = var.rds_alarm_evaluation_period -# threshold_metric_id = "e1" -# alarm_description = "Anomalous database connection count detected. Something unusual is happening." -# -# metric_query { -# id = "e1" -# expression = "ANOMALY_DETECTION_BAND(m1, ${var.anomaly_band_width})" -# label = "DatabaseConnections (Expected)" -# return_data = "true" -# } -# -# metric_query { -# id = "m1" -# return_data = "true" -# metric { -# metric_name = "DatabaseConnections" -# namespace = "AWS/RDS" -# period = var.anomaly_period -# stat = "Average" -# unit = "Count" -# -# dimensions = { -# DBInstanceIdentifier = data.aws_db_instance.civiform.id -# } -# } -# } -#} -# -#// Early Warning System for Transaction ID Wraparound for postgres -#// more info - https://aws.amazon.com/blogs/database/implement-an-early-warning-system-for-transaction-id-wraparound-in-amazon-rds-for-postgresql/ -#resource "aws_cloudwatch_metric_alarm" "maximum_used_transaction_ids_too_high" { -# count = contains(["aurora-postgresql", "postgres"], var.engine) ? 1 : 0 -# alarm_name = "rds-${data.aws_db_instance.civiform.id}-maximumUsedTransactionIDs" -# comparison_operator = "GreaterThanThreshold" -# evaluation_periods = var.rds_alarm_evaluation_period -# metric_name = "MaximumUsedTransactionIDs" -# namespace = "AWS/RDS" -# period = var.rds_alarm_statistic_period -# statistic = "Average" -# threshold = var.rds_max_used_transaction_ids_high_threshold -# alarm_description = "Nearing a possible critical transaction ID wraparound." -#} \ No newline at end of file + +// Connection Count +resource "aws_cloudwatch_metric_alarm" "connection_count_anomalous" { + count = var.rds_create_anomaly_alarm ? 1 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-anomalousConnectionCount" + comparison_operator = "GreaterThanUpperThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + threshold_metric_id = "e1" + alarm_description = "Anomalous database connection count detected. Something unusual is happening." + + metric_query { + id = "e1" + expression = "ANOMALY_DETECTION_BAND(m1, ${var.rds_anomaly_bandwidth})" + label = "DatabaseConnections (Expected)" + return_data = "true" + } + + metric_query { + id = "m1" + return_data = "true" + metric { + metric_name = "DatabaseConnections" + namespace = "AWS/RDS" + period = var.rds_anomaly_period + stat = "Average" + unit = "Count" + + dimensions = { + DBInstanceIdentifier = data.aws_db_instance.civiform.id + } + } + } +} + +// Early Warning System for Transaction ID Wraparound for postgres +// more info - https://aws.amazon.com/blogs/database/implement-an-early-warning-system-for-transaction-id-wraparound-in-amazon-rds-for-postgresql/ +resource "aws_cloudwatch_metric_alarm" "maximum_used_transaction_ids_too_high" { + count = var.rds_create_transaction_id_wraparound_alarm ? 1 : 0 + alarm_name = "rds-${data.aws_db_instance.civiform.id}-maximumUsedTransactionIDs" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = var.rds_alarm_evaluation_period + metric_name = "MaximumUsedTransactionIDs" + namespace = "AWS/RDS" + period = var.rds_alarm_statistic_period + statistic = "Average" + threshold = var.rds_max_used_transaction_ids_high_threshold + alarm_description = "Nearing a possible critical transaction ID wraparound." +} diff --git a/cloud/aws/templates/aws_oidc/variables.tf b/cloud/aws/templates/aws_oidc/variables.tf index 529bf686..cad06855 100644 --- a/cloud/aws/templates/aws_oidc/variables.tf +++ b/cloud/aws/templates/aws_oidc/variables.tf @@ -193,7 +193,7 @@ variable "rds_disk_queue_depth_high_threshold" { variable "rds_create_low_disk_space_alarm" { type = bool description = "Whether or not to create a low disk space alarm for RDS." - default = false + default = true } variable "rds_disk_free_storage_low_threshold" { @@ -205,7 +205,7 @@ variable "rds_disk_free_storage_low_threshold" { variable "rds_create_low_memory_alarm" { type = bool description = "Whether or not to create a low memory free alarm for RDS." - default = false + default = true } variable "rds_low_memory_threshold" { @@ -256,6 +256,24 @@ variable "rds_create_anomaly_alarm" { default = false } +variable "rds_anomaly_bandwidth" { + type = string + description = "The width of the anomaly band, default 2. Higher numbers means less sensitive." + default = "2" +} + +variable "rds_anomaly_period" { + type = string + default = "600" + description = "The number of seconds that make each evaluation period for anomaly detection." +} + +variable "rds_create_transaction_id_wraparound_alarm" { + type = bool + description = "Whether or not to create a transaction ID wraparound alarm for postgres. More information can be found [here](https://aws.amazon.com/blogs/database/implement-an-early-warning-system-for-transaction-id-wraparound-in-amazon-rds-for-postgresql/)." + default = false +} + variable "rds_max_used_transaction_ids_high_threshold" { type = string description = "The threshold for the maximum transaction IDS before the alarm gets triggered. This is to prevent [transaciton ID wraparound](https://aws.amazon.com/blogs/database/implement-an-early-warning-system-for-transaction-id-wraparound-in-amazon-rds-for-postgresql/)" From d365b6ab5b089d0379ae8d374fae34692de89c7a Mon Sep 17 00:00:00 2001 From: dkatzz <86739416+dkatzz@users.noreply.github.com> Date: Tue, 18 Jul 2023 12:51:33 -0400 Subject: [PATCH 7/9] updates --- .../aws_oidc/variable_definitions.json | 36 +++++++++---------- cloud/aws/templates/aws_oidc/variables.tf | 18 ---------- cloud/shared/bin/lib/config_loader.py | 16 +-------- 3 files changed, 19 insertions(+), 51 deletions(-) diff --git a/cloud/aws/templates/aws_oidc/variable_definitions.json b/cloud/aws/templates/aws_oidc/variable_definitions.json index b98662af..60865376 100644 --- a/cloud/aws/templates/aws_oidc/variable_definitions.json +++ b/cloud/aws/templates/aws_oidc/variable_definitions.json @@ -209,24 +209,6 @@ "tfvar": true, "type": "string" }, - "RDS_ALARM_TRIGGERED_ACTIONS": { - "required": false, - "secret": false, - "tfvar": true, - "type": "list" - }, - "RDS_ALARM_CLEARED_ACTIONS": { - "required": false, - "secret": false, - "tfvar": true, - "type": "list" - }, - "RDS_ALARM_TAGS": { - "required": false, - "secret": false, - "tfvar": true, - "type": "dict" - }, "RDS_CREATE_HIGH_CPU_ALARM": { "required": false, "secret": false, @@ -323,6 +305,24 @@ "tfvar": true, "type": "bool" }, + "RDS_ANOMALY_BANDWIDTH": { + "required": false, + "secret": false, + "tfvar": true, + "type": "string" + }, + "RDS_ANOMALY_PERIOD": { + "required": false, + "secret": false, + "tfvar": true, + "type": "string" + }, + "RDS_CREATE_TRANSACTION_ID_WRAPAROUND_ALARM": { + "required": false, + "secret": false, + "tfvar": true, + "type": "bool" + }, "AWS_DB_STORAGE_TYPE": { "required": false, "secret": false, diff --git a/cloud/aws/templates/aws_oidc/variables.tf b/cloud/aws/templates/aws_oidc/variables.tf index 54288a2f..29ded39c 100644 --- a/cloud/aws/templates/aws_oidc/variables.tf +++ b/cloud/aws/templates/aws_oidc/variables.tf @@ -148,24 +148,6 @@ variable "rds_alarm_statistic_period" { default = "60" } -variable "rds_alarm_triggered_actions" { - type = list(any) - description = "A list of actions to take when rds alarms are triggered." - default = [] -} - -variable "rds_alarm_cleared_actions" { - type = list(any) - description = "A list of actions to take when rds alarms are cleared." - default = [] -} - -variable "rds_alarm_tags" { - type = map(string) - description = "Tags to attach to each rds alarm." - default = {} -} - variable "rds_create_high_cpu_alarm" { type = bool description = "Whether or not to create a high CPU alarm for RDS." diff --git a/cloud/shared/bin/lib/config_loader.py b/cloud/shared/bin/lib/config_loader.py index d179042b..ca8ceae3 100644 --- a/cloud/shared/bin/lib/config_loader.py +++ b/cloud/shared/bin/lib/config_loader.py @@ -291,7 +291,7 @@ def _validate_civiform_server_env_vars( f"'{name}' is required but not set") continue - # Variable types are 'string', 'int', 'bool', or 'index-list', 'list', 'dict'. + # Variable types are 'string', 'int', 'bool', or 'index-list'. # Validation for 'index-list' is not implemented at this time because # 'index-list' does not yet support subtyping. if variable.type == "string": @@ -324,20 +324,6 @@ def _validate_civiform_server_env_vars( ) continue - if variable.type == "list": - if not isinstance(config_value, list): - validation_errors.append( - f"'{name}' is required to be a list, got {config_value}" - ) - continue - - if variable.type == "dict": - if not isinstance(config_value, dict): - validation_errors.append( - f"'{name}' is required to be a map, got {config_value}" - ) - continue - return validation_errors def get_terraform_variables(self): From 696fa46ec48a2ba49d8b09790cbff83fe5f82739 Mon Sep 17 00:00:00 2001 From: dkatzz <86739416+dkatzz@users.noreply.github.com> Date: Tue, 18 Jul 2023 13:16:50 -0400 Subject: [PATCH 8/9] Update alarms.tf --- cloud/aws/templates/aws_oidc/alarms.tf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cloud/aws/templates/aws_oidc/alarms.tf b/cloud/aws/templates/aws_oidc/alarms.tf index 9bfa9d96..4fe3825e 100644 --- a/cloud/aws/templates/aws_oidc/alarms.tf +++ b/cloud/aws/templates/aws_oidc/alarms.tf @@ -154,7 +154,6 @@ resource "aws_cloudwatch_metric_alarm" "connection_count_anomalous" { } // Early Warning System for Transaction ID Wraparound for postgres -// more info - https://aws.amazon.com/blogs/database/implement-an-early-warning-system-for-transaction-id-wraparound-in-amazon-rds-for-postgresql/ resource "aws_cloudwatch_metric_alarm" "maximum_used_transaction_ids_too_high" { count = var.rds_create_transaction_id_wraparound_alarm ? 1 : 0 alarm_name = "rds-${data.aws_db_instance.civiform.id}-maximumUsedTransactionIDs" @@ -165,6 +164,6 @@ resource "aws_cloudwatch_metric_alarm" "maximum_used_transaction_ids_too_high" { period = var.rds_alarm_statistic_period statistic = "Average" threshold = var.rds_max_used_transaction_ids_high_threshold - alarm_description = "Nearing a possible critical transaction ID wraparound." + alarm_description = "Nearing a possible critical transaction ID wraparound. More info [here](https://aws.amazon.com/blogs/database/implement-an-early-warning-system-for-transaction-id-wraparound-in-amazon-rds-for-postgresql/)" } From 1862f6e7fb91d9112ff7dbc3864f6bc4454d6d49 Mon Sep 17 00:00:00 2001 From: dkatzz <86739416+dkatzz@users.noreply.github.com> Date: Wed, 19 Jul 2023 12:06:19 -0400 Subject: [PATCH 9/9] Update alarms.tf --- cloud/aws/templates/aws_oidc/alarms.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/aws/templates/aws_oidc/alarms.tf b/cloud/aws/templates/aws_oidc/alarms.tf index 4fe3825e..f0118e31 100644 --- a/cloud/aws/templates/aws_oidc/alarms.tf +++ b/cloud/aws/templates/aws_oidc/alarms.tf @@ -127,7 +127,7 @@ resource "aws_cloudwatch_metric_alarm" "connection_count_anomalous" { comparison_operator = "GreaterThanUpperThreshold" evaluation_periods = var.rds_alarm_evaluation_period threshold_metric_id = "e1" - alarm_description = "Anomalous database connection count detected. Something unusual is happening." + alarm_description = "Anomalous database connection count detected. Check the monitoring graphs and logs for any suspicious activity." metric_query { id = "e1"