diff --git a/.github/bundles/.gitignore b/.github/bundles/.gitignore new file mode 100644 index 0000000000..99264e2b94 --- /dev/null +++ b/.github/bundles/.gitignore @@ -0,0 +1 @@ +**/uds-config.yaml diff --git a/.github/bundles/aks/uds-config.yaml b/.github/bundles/aks/uds-config.yaml deleted file mode 100644 index 4ab2f6d069..0000000000 --- a/.github/bundles/aks/uds-config.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2024 Defense Unicorns -# SPDX-License-Identifier: AGPL-3.0-or-later OR LicenseRef-Defense-Unicorns-Commercial - -# Overwritten in ci by uds-config.tf in test-infra/azure/aks -variables: - core: - azure_loki_storage_account: ${ZARF_VAR_AZURE_LOKI_STORAGE_ACCOUNT} - azure_loki_storage_account_access_key: ${ZARF_VAR_AZURE_LOKI_STORAGE_ACCOUNT_ACCESS_KEY} - azure_loki_storage_account_container: ${ZARF_VAR_AZURE_LOKI_STORAGE_ACCOUNT_CONTAINER} - azure_velero_storage_account: ${ZARF_VAR_AZURE_VELERO_STORAGE_ACCOUNT} - azure_velero_storage_account_acces_key: ${ZARF_VAR_AZURE_VELERO_STORAGE_ACCOUNT_ACCESS_KEY} - azure_velero_storage_account_container: ${ZARF_VAR_AZURE_VELERO_STORAGE_ACCOUNT_CONTAINER} - azure_subscription_id: ${ZARF_VAR_AZURE_SUBSCRIPTION_ID} - azure_resource_group: ${ZARF_VAR_AZURE_RESOURCE_GROUP} diff --git a/.github/bundles/eks/uds-config.yaml b/.github/bundles/eks/uds-config.yaml deleted file mode 100644 index 12adfdd490..0000000000 --- a/.github/bundles/eks/uds-config.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2024 Defense Unicorns -# SPDX-License-Identifier: AGPL-3.0-or-later OR LicenseRef-Defense-Unicorns-Commercial - -# Overwritten by ci-iac-aws package -options: - architecture: amd64 - -variables: - core: - loki_chunks_bucket: ${ZARF_VAR_LOKI_S3_BUCKET} - loki_ruler_bucket: ${ZARF_VAR_LOKI_S3_BUCKET} - loki_admin_bucket: ${ZARF_VAR_LOKI_S3_BUCKET} - loki_s3_region: ${ZARF_VAR_LOKI_S3_AWS_REGION} - loki_irsa_role_arn: ${ZARF_VAR_LOKI_S3_ROLE_ARN} - velero_use_secret: false - velero_irsa_role_arn: "${ZARF_VAR_VELERO_S3_ROLE_ARN}" - velero_bucket: ${ZARF_VAR_VELERO_S3_BUCKET} - velero_bucket_region: ${ZARF_VAR_VELERO_S3_AWS_REGION} - velero_bucket_provider_url: "" - velero_bucket_credential_name: "" - velero_bucket_credential_key: "" - grafana_ha: true - grafana_pg_host: ${ZARF_VAR_GRAFANA_PG_HOST} - grafana_pg_port: ${ZARF_VAR_GRAFANA_PG_PORT} - grafana_pg_database: ${ZARF_VAR_GRAFANA_PG_DATABASE} - grafana_pg_password: ${ZARF_VAR_GRAFANA_PG_PASSWORD} - grafana_pg_user: ${ZARF_VAR_GRAFANA_PG_USER} diff --git a/.github/bundles/rke2/uds-config.yaml b/.github/bundles/rke2/uds-config.yaml deleted file mode 100644 index b55faf5a8d..0000000000 --- a/.github/bundles/rke2/uds-config.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2024 Defense Unicorns -# SPDX-License-Identifier: AGPL-3.0-or-later OR LicenseRef-Defense-Unicorns-Commercial - -# Overwritten by ci-iac-aws package -options: - architecture: amd64 - -variables: - core: - loki_chunks_bucket: ${ZARF_VAR_LOKI_S3_BUCKET} - loki_ruler_bucket: ${ZARF_VAR_LOKI_S3_BUCKET} - loki_admin_bucket: ${ZARF_VAR_LOKI_S3_BUCKET} - loki_s3_region: ${ZARF_VAR_LOKI_S3_AWS_REGION} - loki_irsa_role_arn: ${ZARF_VAR_LOKI_S3_ROLE_ARN} - velero_use_secret: false - velero_irsa_role_arn: "${ZARF_VAR_VELERO_S3_ROLE_ARN}" - velero_bucket: ${ZARF_VAR_VELERO_S3_BUCKET} - velero_bucket_region: ${ZARF_VAR_VELERO_S3_AWS_REGION} - velero_bucket_provider_url: "" - velero_bucket_credential_name: "" - velero_bucket_credential_key: "" diff --git a/.github/test-infra/aws/eks/cluster.tf b/.github/test-infra/aws/eks/cluster.tf new file mode 100644 index 0000000000..d10d13f876 --- /dev/null +++ b/.github/test-infra/aws/eks/cluster.tf @@ -0,0 +1,154 @@ +# Copyright 2025 Defense Unicorns +# SPDX-License-Identifier: AGPL-3.0-or-later OR LicenseRef-Defense-Unicorns-Commercial + + +# Create a custom launch template with public IP association +resource "aws_launch_template" "eks_node_group" { + name_prefix = "${var.name}-lt-" + + network_interfaces { + associate_public_ip_address = true + delete_on_termination = true + } + + tag_specifications { + resource_type = "instance" + tags = merge(local.tags, { + Name = "${var.name}-node" + }) + } + + lifecycle { + create_before_destroy = true + } +} + +# Create EKS Cluster +module "eks" { + source = "terraform-aws-modules/eks/aws" + version = "~> 20.35.0" + + cluster_name = var.name + cluster_version = var.kubernetes_version + cluster_endpoint_public_access = true + cluster_endpoint_private_access = false + + vpc_id = data.aws_vpc.vpc.id + subnet_ids = local.subnet_ids + + # IAM + iam_role_permissions_boundary = "arn:${data.aws_partition.current.partition}:iam::${data.aws_caller_identity.current.account_id}:policy/${var.permissions_boundary_name}" + + # Add CloudWatch logging + cluster_enabled_log_types = [] + cloudwatch_log_group_retention_in_days = 0 + + # Authentication mode + authentication_mode = "API_AND_CONFIG_MAP" + + # Enable cluster creator admin permissions + enable_cluster_creator_admin_permissions = true + + # Security groups + create_cluster_security_group = true + create_node_security_group = true + node_security_group_enable_recommended_rules = true + node_security_group_additional_rules = { + clusterapi_ingress = { + description = "Cluster API Ingress on non-privileged ports" + protocol = "tcp" + from_port = 1025 + to_port = 65535 + type = "ingress" + source_cluster_security_group = true + } + } + + enable_security_groups_for_pods = false + + # Add tags to all resources + tags = local.tags + + # Node groups + eks_managed_node_groups = { + main = { + name = var.name + instance_types = [var.instance_type] + ami_type = "BOTTLEROCKET_x86_64_FIPS" + + min_size = var.node_group_min_size + max_size = var.node_group_max_size + desired_size = var.node_group_desired_size + + disk_size = var.node_disk_size + + # Let the module create the IAM role with permissions boundary + create_iam_role = true + iam_role_use_name_prefix = false + iam_role_name = "${substr(var.name, 0, 30)}-eks-node-role" + iam_role_permissions_boundary = "arn:${data.aws_partition.current.partition}:iam::${data.aws_caller_identity.current.account_id}:policy/${var.permissions_boundary_name}" + + # Use our custom launch template that has public IP association + create_launch_template = false + launch_template_id = aws_launch_template.eks_node_group.id + launch_template_version = aws_launch_template.eks_node_group.latest_version + + # Add required policies for node functionality + iam_role_additional_policies = { + AmazonSSMManagedInstanceCore = "arn:${data.aws_partition.current.partition}:iam::aws:policy/AmazonSSMManagedInstanceCore" + AmazonEBSCSIDriverPolicy = "arn:${data.aws_partition.current.partition}:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy" + } + + tags = merge(local.tags, { + PermissionsBoundary = var.permissions_boundary_name + }) + } + } + + # EKS Addons + cluster_addons = { + vpc-cni = { + most_recent = true + configuration_values = jsonencode({ + enableNetworkPolicy = "true" + }) + } + aws-ebs-csi-driver = { + most_recent = true + } + kube-proxy = { + most_recent = true + } + coredns = { + most_recent = true + configuration_values = jsonencode({ + corefile = <<-EOT + .:53 { + errors + health { + lameduck 5s + } + ready + kubernetes cluster.local cluster.local in-addr.arpa ip6.arpa { + pods insecure + fallthrough in-addr.arpa ip6.arpa + ttl 30 + } + prometheus 0.0.0.0:9153 + forward . /etc/resolv.conf + cache 30 + loop + reload + loadbalance + rewrite stop { + name regex (.*\.admin\.uds\.dev) admin-ingressgateway.istio-admin-gateway.svc.cluster.local answer auto + } + rewrite stop { + name regex (.*\.uds\.dev) tenant-ingressgateway.istio-tenant-gateway.svc.cluster.local answer auto + } + } + EOT + }) + } + } +} diff --git a/.github/test-infra/aws/eks/data.tf b/.github/test-infra/aws/eks/data.tf new file mode 100644 index 0000000000..2e150ca7b8 --- /dev/null +++ b/.github/test-infra/aws/eks/data.tf @@ -0,0 +1,35 @@ +# Copyright 2024 Defense Unicorns +# SPDX-License-Identifier: AGPL-3.0-or-later OR LicenseRef-Defense-Unicorns-Commercial + +# Common data sources +data "aws_caller_identity" "current" {} +data "aws_partition" "current" {} +data "aws_region" "current" {} + +# Use existing VPC and subnets +data "aws_vpc" "vpc" { + filter { + name = "tag:Name" + values = [var.vpc_name] + } +} + +data "aws_subnet" "eks_ci_subnet_b" { + vpc_id = data.aws_vpc.vpc.id + availability_zone = "${var.region}b" + + filter { + name = "tag:Name" + values = [var.subnet_name] + } +} + +data "aws_subnet" "eks_ci_subnet_c" { + vpc_id = data.aws_vpc.vpc.id + availability_zone = "${var.region}c" + + filter { + name = "tag:Name" + values = [var.subnet_name] + } +} diff --git a/.github/test-infra/aws/eks/main.tf b/.github/test-infra/aws/eks/main.tf index 4b93d0ec44..ea98a15fe0 100644 --- a/.github/test-infra/aws/eks/main.tf +++ b/.github/test-infra/aws/eks/main.tf @@ -1,25 +1,17 @@ # Copyright 2024 Defense Unicorns # SPDX-License-Identifier: AGPL-3.0-or-later OR LicenseRef-Defense-Unicorns-Commercial - -resource "random_id" "default" { - byte_length = 2 -} - -data "aws_eks_cluster" "existing" { - name = var.name -} - -data "aws_caller_identity" "current" {} - -data "aws_partition" "current" {} - -data "aws_region" "current" {} - locals { - oidc_url_without_protocol = substr(data.aws_eks_cluster.existing.identity[0].oidc[0].issuer, 8, -1) - oidc_arn = "arn:${data.aws_partition.current.partition}:iam::${data.aws_caller_identity.current.account_id}:oidc-provider/${local.oidc_url_without_protocol}" - iam_role_permissions_boundary = var.use_permissions_boundary ? "arn:${data.aws_partition.current.partition}:iam::${data.aws_caller_identity.current.account_id}:policy/${var.permissions_boundary_name}" : null + # Combine subnet IDs for EKS + subnet_ids = [data.aws_subnet.eks_ci_subnet_b.id, data.aws_subnet.eks_ci_subnet_c.id] + + # Tags for resources + tags = { + Name = var.name + Environment = "ci" + PermissionsBoundary = var.permissions_boundary_name + } + # Bucket configurations for IRSA bucket_configurations = { for instance in var.bucket_configurations : instance.name => { @@ -29,18 +21,13 @@ locals { } } - kms_key_arns = module.generate_kms - + # IAM policies for IRSA iam_policies = { "loki" = resource.aws_iam_policy.loki_policy.arn "velero" = resource.aws_iam_policy.velero_policy.arn } } -resource "random_id" "unique_id" { - byte_length = 4 -} - module "generate_kms" { for_each = local.bucket_configurations source = "../modules/kms" @@ -51,6 +38,15 @@ module "generate_kms" { tags = { Deployment = "UDS Core ${each.value.name}" } + + # Explicit dependency on EKS cluster + depends_on = [ + module.eks + ] +} + +resource "random_id" "unique_id" { + byte_length = 4 } module "S3" { @@ -59,6 +55,11 @@ module "S3" { bucket_prefix = "${each.value.name}-" kms_key_arn = module.generate_kms[each.key].kms_key_arn irsa_role_arn = module.irsa[each.key].role_arn + + # Explicit dependency on KMS + depends_on = [ + module.generate_kms + ] } module "irsa" { @@ -66,17 +67,24 @@ module "irsa" { source = "../modules/irsa" name = each.value.name kubernetes_service_account = each.value.service_account - role_permissions_boundary_arn = local.iam_role_permissions_boundary + role_permissions_boundary_arn = "arn:${data.aws_partition.current.partition}:iam::${data.aws_caller_identity.current.account_id}:policy/${var.permissions_boundary_name}" account_id = data.aws_caller_identity.current.account_id current_partition = data.aws_partition.current.partition oidc_providers = { main = { - provider_arn = local.oidc_arn + provider_arn = module.eks.oidc_provider_arn namespace_service_accounts = [format("%s:%s", each.value.namespace, each.value.service_account)] } } role_policy_arns = tomap({ - "${each.key}" = local.iam_policies[each.key] + (each.key) = local.iam_policies[each.key] }) + + # Explicit dependency on EKS cluster + depends_on = [ + module.eks, + aws_iam_policy.loki_policy, + aws_iam_policy.velero_policy + ] } diff --git a/.github/test-infra/aws/eks/outputs.tf b/.github/test-infra/aws/eks/outputs.tf index f0e4e62245..b2840f50b7 100644 --- a/.github/test-infra/aws/eks/outputs.tf +++ b/.github/test-infra/aws/eks/outputs.tf @@ -6,30 +6,37 @@ output "aws_region" { } output "loki_irsa_role_arn" { + sensitive = true value = module.irsa["loki"].role_arn } output "loki_s3" { + sensitive = true value = module.S3["loki"] } output "loki_s3_bucket" { + sensitive = true value = module.S3["loki"].bucket_name } output "velero_irsa_role_arn" { + sensitive = true value = module.irsa["velero"].role_arn } output "velero_s3" { + sensitive = true value = module.S3["velero"] } output "velero_s3_bucket" { + sensitive = true value = module.S3["velero"].bucket_name } output "grafana_pg_host" { + sensitive = true description = "RDS Endpoint for Grafana" value = element(split(":", module.db.db_instance_endpoint), 0) } @@ -57,4 +64,4 @@ output "grafana_pg_password" { output "grafana_ha" { value = true -} \ No newline at end of file +} diff --git a/.github/test-infra/aws/eks/rds.tf b/.github/test-infra/aws/eks/rds.tf index 0f59141d3c..06f4b45153 100644 --- a/.github/test-infra/aws/eks/rds.tf +++ b/.github/test-infra/aws/eks/rds.tf @@ -22,7 +22,7 @@ module "db" { source = "terraform-aws-modules/rds/aws" version = "6.12.0" - identifier = "${var.db_name}-db" + identifier = "${var.name}-db" instance_use_identifier_prefix = true allocated_storage = var.db_allocated_storage @@ -34,14 +34,14 @@ module "db" { engine = "postgres" engine_version = var.db_engine_version major_engine_version = split(".", var.db_engine_version)[0] - family = "postgres15" + family = "postgres16" instance_class = var.db_instance_class db_name = var.db_name username = var.username port = var.db_port - subnet_ids = data.aws_subnets.subnets.ids + subnet_ids = data.aws_subnets.rds_subnets.ids create_db_subnet_group = true create_db_parameter_group = false manage_master_user_password = false @@ -55,7 +55,7 @@ module "db" { } resource "aws_security_group" "rds_sg" { - vpc_id = local.vpc_id + vpc_id = data.aws_vpc.rds_vpc.id egress { from_port = 0 @@ -75,20 +75,16 @@ resource "aws_vpc_security_group_ingress_rule" "rds_ingress" { to_port = 5432 } -data "aws_vpc" "vpc" { +data "aws_vpc" "rds_vpc" { filter { name = "tag:Name" - values = ["eksctl-${var.name}-cluster/VPC"] + values = [var.vpc_name] } } -data "aws_subnets" "subnets" { +data "aws_subnets" "rds_subnets" { filter { name = "vpc-id" - values = [data.aws_vpc.vpc.id] + values = [data.aws_vpc.rds_vpc.id] } } - -locals { - vpc_id = data.aws_vpc.vpc.id -} diff --git a/.github/test-infra/aws/eks/uds-config.tf b/.github/test-infra/aws/eks/uds-config.tf index 29f1963c85..7f87167a3e 100644 --- a/.github/test-infra/aws/eks/uds-config.tf +++ b/.github/test-infra/aws/eks/uds-config.tf @@ -28,6 +28,10 @@ resource "local_sensitive_file" "uds_config" { "grafana_pg_password" : random_password.db_password.result, "grafana_pg_user" : var.username } + "init" : { + # Workaround for Bottlerocket EBS issue - https://github.com/bottlerocket-os/bottlerocket/issues/2417 + "registry_hpa_enable" : false + } } }) } diff --git a/.github/test-infra/aws/eks/variables.tf b/.github/test-infra/aws/eks/variables.tf index 94bcb7e156..cb2741f009 100644 --- a/.github/test-infra/aws/eks/variables.tf +++ b/.github/test-infra/aws/eks/variables.tf @@ -12,29 +12,12 @@ variable "name" { } variable "permissions_boundary_name" { - description = "The name of the permissions boundary for IAM resources. This will be used for tagging and to build out the ARN." + description = "Name of the permissions boundary to use for IAM roles" type = string default = null } -variable "use_permissions_boundary" { - description = "Whether to use IAM permissions boundary for resources." - type = bool - default = true -} - -variable "key_owner_arns" { - description = "ARNS of KMS key owners, needed for use of key" - type = list(string) - default = [] -} - -# taken from zarf bb repo -variable "kms_key_deletion_window" { - description = "Waiting period for scheduled KMS Key deletion. Can be 7-30 days." - type = number - default = 7 -} +# Core Dependency Config variable "bucket_configurations" { type = map(object({ @@ -82,7 +65,7 @@ variable "username" { variable "db_engine_version" { description = "The Postgres engine version to use for the DB" type = string - default = "15.7" + default = "16.8" } variable "db_allocated_storage" { @@ -91,14 +74,58 @@ variable "db_allocated_storage" { default = 20 } -variable "db_storage_type" { - description = "The type of storage (e.g., gp2, io1)" - type = string - default = "gp2" -} - variable "db_instance_class" { description = "The class of RDS instance (e.g., db.t4g.large)" type = string default = "db.t4g.large" } + +# EKS Config + +variable "kubernetes_version" { + description = "Kubernetes version to use for the EKS cluster" + type = string + default = "1.31" +} + +variable "vpc_name" { + description = "Name of the VPC to use for the EKS cluster" + type = string + default = "uds-vpc" +} + +variable "subnet_name" { + type = string + description = "Name of subnet to use for testing. Can use a wildcard as long as it only matches one subnet per az." + default = "uds-vpc-public*" +} + +variable "instance_type" { + description = "Instance type to use for the EKS node group" + type = string + default = "m5.2xlarge" +} + +variable "node_group_min_size" { + description = "Minimum size of the EKS node group" + type = number + default = 3 +} + +variable "node_group_max_size" { + description = "Maximum size of the EKS node group" + type = number + default = 3 +} + +variable "node_group_desired_size" { + description = "Desired size of the EKS node group" + type = number + default = 3 +} + +variable "node_disk_size" { + description = "Disk size in GB for the EKS node group" + type = number + default = 150 +} diff --git a/.github/test-infra/aws/eks/versions.tf b/.github/test-infra/aws/eks/versions.tf index f125823896..344e3570aa 100644 --- a/.github/test-infra/aws/eks/versions.tf +++ b/.github/test-infra/aws/eks/versions.tf @@ -6,6 +6,7 @@ provider "aws" { default_tags { tags = { + run-id = var.name PermissionsBoundary = var.permissions_boundary_name } } @@ -24,5 +25,10 @@ terraform { source = "hashicorp/random" version = "3.7.2" } + + local = { + source = "hashicorp/local" + version = "2.5.2" + } } -} \ No newline at end of file +} diff --git a/.github/test-infra/aws/modules/irsa/versions.tf b/.github/test-infra/aws/modules/irsa/versions.tf index 76015e9fee..f8d664263e 100644 --- a/.github/test-infra/aws/modules/irsa/versions.tf +++ b/.github/test-infra/aws/modules/irsa/versions.tf @@ -3,12 +3,10 @@ terraform { required_version = ">= 1.8.0" - backend "s3" { - } required_providers { aws = { source = "hashicorp/aws" version = ">= 5.0" } } -} \ No newline at end of file +} diff --git a/.github/test-infra/aws/modules/kms/versions.tf b/.github/test-infra/aws/modules/kms/versions.tf index 76015e9fee..f8d664263e 100644 --- a/.github/test-infra/aws/modules/kms/versions.tf +++ b/.github/test-infra/aws/modules/kms/versions.tf @@ -3,12 +3,10 @@ terraform { required_version = ">= 1.8.0" - backend "s3" { - } required_providers { aws = { source = "hashicorp/aws" version = ">= 5.0" } } -} \ No newline at end of file +} diff --git a/.github/test-infra/aws/modules/s3/versions.tf b/.github/test-infra/aws/modules/s3/versions.tf index 76015e9fee..f8d664263e 100644 --- a/.github/test-infra/aws/modules/s3/versions.tf +++ b/.github/test-infra/aws/modules/s3/versions.tf @@ -3,12 +3,10 @@ terraform { required_version = ">= 1.8.0" - backend "s3" { - } required_providers { aws = { source = "hashicorp/aws" version = ">= 5.0" } } -} \ No newline at end of file +} diff --git a/.github/test-infra/aws/rke2/terraform.tfvars b/.github/test-infra/aws/rke2/terraform.tfvars index 53ff1f7dd3..e5cbc20a7e 100644 --- a/.github/test-infra/aws/rke2/terraform.tfvars +++ b/.github/test-infra/aws/rke2/terraform.tfvars @@ -4,4 +4,4 @@ os_distro = "rhel" # Need to allow in from internet for github runner to connect to node allowed_in_cidrs = ["0.0.0.0/0"] -rke2_version = "1.30" \ No newline at end of file +rke2_version = "1.31" diff --git a/.github/test-infra/aws/rke2/uds-config.tf b/.github/test-infra/aws/rke2/uds-config.tf index 1ea9dd2253..86d2ed3622 100644 --- a/.github/test-infra/aws/rke2/uds-config.tf +++ b/.github/test-infra/aws/rke2/uds-config.tf @@ -28,6 +28,10 @@ resource "local_sensitive_file" "uds_config" { "grafana_pg_password" : "\"\"", "grafana_pg_user" : "\"\"", } + "init" : { + # Disabled to prevent scaling timing issues with image pushes + "registry_hpa_enable" : false + } } }) } diff --git a/.github/test-infra/aws/rke2/variables.tf b/.github/test-infra/aws/rke2/variables.tf index 8236dd1e87..49d5ad7d65 100644 --- a/.github/test-infra/aws/rke2/variables.tf +++ b/.github/test-infra/aws/rke2/variables.tf @@ -14,7 +14,7 @@ variable "vpc_name" { variable "subnet_name" { type = string - description = "Name of subnet tobrew install libtool use for testing. Can use a wildcard as long as it only matches one subnet per az." + description = "Name of subnet to use for testing. Can use a wildcard as long as it only matches one subnet per az." default = "uds-vpc-public*" } diff --git a/.github/test-infra/azure/aks/uds-config.tf b/.github/test-infra/azure/aks/uds-config.tf index cafb69cc3b..33cdb48357 100644 --- a/.github/test-infra/azure/aks/uds-config.tf +++ b/.github/test-infra/azure/aks/uds-config.tf @@ -24,6 +24,10 @@ resource "local_sensitive_file" "uds_config" { "grafana_pg_password" : random_password.db_password.result, "grafana_pg_user" : var.username, } + "init" : { + # Disabled to prevent scaling timing issues with image pushes + "registry_hpa_enable" : false + } } }) } @@ -37,4 +41,4 @@ data "azurerm_kubernetes_cluster" "aks_cluster" { resource "local_sensitive_file" "kubeconfig" { filename = "/home/runner/.kube/config" content = data.azurerm_kubernetes_cluster.aks_cluster.kube_admin_config_raw -} \ No newline at end of file +} diff --git a/.github/test-infra/azure/aks/variables.tf b/.github/test-infra/azure/aks/variables.tf index 579ecc4fa5..d67b7e2135 100644 --- a/.github/test-infra/azure/aks/variables.tf +++ b/.github/test-infra/azure/aks/variables.tf @@ -37,7 +37,7 @@ variable "sku_tier" { variable "kubernetes_version" { description = "Specifies the AKS Kubernetes version" - default = "1.30" + default = "1.31" type = string } diff --git a/.github/workflows/test-aks.yaml b/.github/workflows/test-aks.yaml index 1251b85527..eb4f8e8698 100644 --- a/.github/workflows/test-aks.yaml +++ b/.github/workflows/test-aks.yaml @@ -5,7 +5,7 @@ name: AKS Test on: schedule: - - cron: '0 0 * * 0' # Every Sunday Midnight (UTC) / Saturday 5pm MT + - cron: "0 0 * * 0" # Every Sunday Midnight (UTC) / Saturday 5pm MT pull_request: # milestoned is added here as a workaround for release-please not triggering PR workflows (PRs should be added to a milestone to trigger the workflow). # labeled is added here to allow for "manual" triggering of CI on renovate PRs @@ -63,7 +63,7 @@ jobs: client-id: ${{ secrets.AZURE_GOV_CLIENT_ID }} tenant-id: ${{ secrets.AZURE_GOV_TENANT_ID }} subscription-id: ${{ secrets.AZURE_GOV_SUBSCRIPTION_ID }} - environment: 'AzureUSGovernment' + environment: "AzureUSGovernment" - name: Environment setup uses: ./.github/actions/setup @@ -80,7 +80,7 @@ jobs: tofu_wrapper: false - name: Create UDS Core Package - run: ZARF_ARCHITECTURE=amd64 uds run -f tasks/create.yaml standard-package --no-progress --set FLAVOR=${{ matrix.flavor }} + run: ZARF_ARCHITECTURE=amd64 uds run -f tasks/create.yaml standard-package --no-progress --with create_options="--skip-sbom" --set FLAVOR=${{ matrix.flavor }} - name: Create Core Bundle run: uds create .github/bundles/aks --confirm diff --git a/.github/workflows/test-eks.yaml b/.github/workflows/test-eks.yaml index 3247a08356..f555213372 100644 --- a/.github/workflows/test-eks.yaml +++ b/.github/workflows/test-eks.yaml @@ -5,7 +5,7 @@ name: EKS Test on: schedule: - - cron: '0 0 * * 0' # Every Sunday Midnight (UTC) / Saturday 5pm MT + - cron: "0 0 * * 0" # Every Sunday Midnight (UTC) / Saturday 5pm MT pull_request: # milestoned is added here as a workaround for release-please not triggering PR workflows (PRs should be added to a milestone to trigger the workflow). # labeled is added here to allow for "manual" triggering of CI on renovate PRs @@ -43,7 +43,6 @@ jobs: echo "UDS_STATE_KEY="tfstate/ci/install/${SHA:0:7}-eks-core-${{ matrix.flavor }}-aws.tfstate >> $GITHUB_ENV echo "TF_VAR_region=${UDS_REGION}" >> $GITHUB_ENV echo "TF_VAR_name=uds-ci-${{ matrix.flavor }}-${SHA:0:7}" >> $GITHUB_ENV - echo "TF_VAR_use_permissions_boundary=true" >> $GITHUB_ENV echo "TF_VAR_permissions_boundary_name=${UDS_PERMISSIONS_BOUNDARY_NAME}" >> $GITHUB_ENV - name: Checkout repository @@ -69,9 +68,6 @@ jobs: ghToken: ${{ secrets.GITHUB_TOKEN }} chainguardIdentity: ${{ secrets.CHAINGUARD_IDENTITY }} - - name: Install eksctl - run: uds run -f tasks/iac.yaml install-eksctl --no-progress - - name: Setup Tofu uses: opentofu/setup-opentofu@592200bd4b9bbf4772ace78f887668b1aee8f716 # v1.0.5 with: @@ -79,21 +75,21 @@ jobs: tofu_wrapper: false - name: Create UDS Core Package - run: ZARF_ARCHITECTURE=amd64 uds run -f tasks/create.yaml standard-package --no-progress --set FLAVOR=${{ matrix.flavor }} + run: ZARF_ARCHITECTURE=amd64 uds run -f tasks/create.yaml standard-package --no-progress --with create_options="--skip-sbom" --set FLAVOR=${{ matrix.flavor }} - name: Create Core Bundle - run: uds create .github/bundles/eks --confirm + run: uds create .github/bundles/eks --confirm -a amd64 - name: Create Cluster - run: uds run -f tasks/iac.yaml create-cluster --no-progress + run: uds run -f tasks/iac.yaml create-iac --no-progress --set K8S_DISTRO=eks --set CLOUD=aws timeout-minutes: 60 - - name: Create IAC - run: uds run -f tasks/iac.yaml create-iac --no-progress --set K8S_DISTRO=eks --set CLOUD=aws - timeout-minutes: 20 + - name: Get Kubeconfig + run: uds run -f tasks/iac.yaml eks-get-kubeconfig --no-progress + timeout-minutes: 5 - - name: Configure Cluster DNS - run: uds run -f tasks/utils.yaml eks-coredns-setup --no-progress + - name: Setup GP3 Storage Class + run: uds run -f tasks/utils.yaml eks-storageclass-setup --no-progress - name: Deploy Core Bundle env: @@ -102,7 +98,7 @@ jobs: timeout-minutes: 30 - name: Test UDS Core - run: uds run -f tasks/test.yaml uds-core-non-k3d --set EXCLUDED_PACKAGES="metrics-server" + run: uds run -f tasks/test.yaml uds-core-non-k3d - name: Debug Output if: ${{ always() }} @@ -121,14 +117,8 @@ jobs: timeout-minutes: 10 continue-on-error: true - - name: Remove IAC - if: always() - run: uds run -f tasks/iac.yaml destroy-iac --no-progress --set K8S_DISTRO=eks --set CLOUD=aws - timeout-minutes: 10 - continue-on-error: true - - name: Teardown EKS cluster if: always() - run: uds run -f tasks/iac.yaml destroy-cluster --no-progress --set K8S_DISTRO=eks + run: uds run -f tasks/iac.yaml destroy-iac --no-progress --set K8S_DISTRO=eks --set CLOUD=aws timeout-minutes: 30 continue-on-error: true diff --git a/.github/workflows/test-rke2.yaml b/.github/workflows/test-rke2.yaml index 51d79424a3..a015bf7fb1 100644 --- a/.github/workflows/test-rke2.yaml +++ b/.github/workflows/test-rke2.yaml @@ -5,7 +5,7 @@ name: RKE2 Test on: schedule: - - cron: '0 0 * * 0' # Every Sunday Midnight (UTC) / Saturday 5pm MT + - cron: "0 0 * * 0" # Every Sunday Midnight (UTC) / Saturday 5pm MT pull_request: # milestoned is added here as a workaround for release-please not triggering PR workflows (PRs should be added to a milestone to trigger the workflow). # labeled is added here to allow for "manual" triggering of CI on renovate PRs @@ -78,7 +78,7 @@ jobs: tofu_wrapper: false - name: Create UDS Core Package - run: ZARF_ARCHITECTURE=amd64 uds run -f tasks/create.yaml standard-package --no-progress --set FLAVOR=${{ matrix.flavor }} + run: ZARF_ARCHITECTURE=amd64 uds run -f tasks/create.yaml standard-package --no-progress --with create_options="--skip-sbom" --set FLAVOR=${{ matrix.flavor }} - name: Create Core Bundle run: uds create .github/bundles/rke2 --confirm @@ -118,8 +118,6 @@ jobs: - name: Remove UDS Core if: always() run: uds remove .github/bundles/rke2/uds-bundle-uds-core-rke2-*.tar.zst --confirm - env: - KUBECONFIG: ".github/test-infra/aws/rke2/rke2-config" timeout-minutes: 20 continue-on-error: true diff --git a/src/pepr/patches/index.ts b/src/pepr/patches/index.ts index b6eb6ca13b..ea5859fdc2 100644 --- a/src/pepr/patches/index.ts +++ b/src/pepr/patches/index.ts @@ -44,7 +44,6 @@ When(a.Service) /** * Mutate the Neuvector Enforcer DaemonSet to add a livenessProbe */ - When(a.DaemonSet) .IsCreatedOrUpdated() .InNamespace("neuvector") @@ -80,7 +79,6 @@ When(a.DaemonSet) * Mutate the Neuvector Controller Deployment to patch in new readinessProbe * See issue for reference: https://github.com/defenseunicorns/uds-core/issues/1446 */ - When(a.Deployment) .IsCreatedOrUpdated() .InNamespace("neuvector") diff --git a/tasks/iac.yaml b/tasks/iac.yaml index 85c744fac4..2afe4be7bc 100644 --- a/tasks/iac.yaml +++ b/tasks/iac.yaml @@ -17,83 +17,8 @@ variables: - name: RESOURCE_GROUP_NAME - name: STORAGE_ACCOUNT_NAME - name: CONTAINER_NAME - - name: AMI_ID - default: ami-068ab6ac1cec494e0 tasks: - - name: install-eksctl - actions: - - cmd: | - curl --silent --location "https://github.com/weaveworks/eksctl/releases/download/v0.207.0/eksctl_Linux_amd64.tar.gz" | tar xz -C /tmp - sudo mv /tmp/eksctl /usr/local/bin - - - name: create-cluster - actions: - - cmd: | - cat < cluster-config.yaml - apiVersion: eksctl.io/v1alpha5 - kind: ClusterConfig - - metadata: - name: ${CLUSTER_NAME} - region: ${REGION} - version: "1.30" - tags: - PermissionsBoundary: ${PERMISSIONS_BOUNDARY_NAME} - - iam: - withOIDC: true - serviceRolePermissionsBoundary: ${PERMISSIONS_BOUNDARY_ARN} - - addons: - - name: vpc-cni - attachPolicyARNs: - # Commercial IAM Policy - # - arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy - # Govcloud IAM policy - - arn:aws-us-gov:iam::aws:policy/AmazonEKS_CNI_Policy - permissionsBoundary: ${PERMISSIONS_BOUNDARY_ARN} - tags: - PermissionsBoundary: ${PERMISSIONS_BOUNDARY_NAME} - configurationValues: |- - enableNetworkPolicy: "true" - - - name: aws-ebs-csi-driver - attachPolicyARNs: - # Commercial IAM Policy - # - arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy - # GovCloud IAM Policy - - arn:aws-us-gov:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy - configurationValues: |- - defaultStorageClass: - enabled: true - permissionsBoundary: ${PERMISSIONS_BOUNDARY_ARN} - tags: - PermissionsBoundary: ${PERMISSIONS_BOUNDARY_NAME} - - managedNodeGroups: - - name: ng-1 - instanceType: m5.2xlarge - desiredCapacity: 3 - volumeSize: 150 - tags: - PermissionsBoundary: ${PERMISSIONS_BOUNDARY_NAME} - iam: - instanceRolePermissionsBoundary: ${PERMISSIONS_BOUNDARY_ARN} - withAddonPolicies: - cloudWatch: true - ebs: true - cloudWatch: - clusterLogging: - enableTypes: ["*"] - logRetentionInDays: 1 - EOF - - - cmd: eksctl create cluster --dry-run -f cluster-config.yaml - - cmd: sleep 5 - - cmd: eksctl create cluster -f cluster-config.yaml - - cmd: eksctl utils write-kubeconfig -c ${CLUSTER_NAME} - - name: rke2-get-kubeconfig actions: - cmd: chmod +x ./scripts/get-kubeconfig.sh && ./scripts/get-kubeconfig.sh @@ -144,10 +69,6 @@ tasks: dir: .github/test-infra/aws/rke2/ maxTotalSeconds: 600 - - name: destroy-cluster - actions: - - cmd: eksctl delete cluster -f cluster-config.yaml --disable-nodegroup-eviction --wait - - name: create-iac actions: - task: apply-tofu @@ -178,3 +99,8 @@ tasks: dir: .github/test-infra/${CLOUD}/${K8S_DISTRO} - cmd: tofu apply -auto-approve dir: .github/test-infra/${CLOUD}/${K8S_DISTRO} + + - name: eks-get-kubeconfig + actions: + - cmd: | + aws eks update-kubeconfig --name ${CLUSTER_NAME} --region ${REGION} diff --git a/tasks/utils.yaml b/tasks/utils.yaml index 526f218951..854751b438 100644 --- a/tasks/utils.yaml +++ b/tasks/utils.yaml @@ -78,44 +78,26 @@ tasks: - protocol: TCP port: 9153 EOF - - name: eks-coredns-setup + - name: eks-storageclass-setup actions: - - description: Setup Custom ConfigMap for Core DNS + - description: Setup GP3 Storage Class cmd: | uds zarf tools kubectl apply -f - </dev/null) if [ -z $IP_ADDR ]; then HOSTNAME=$(uds zarf tools kubectl get service -n istio-admin-gateway admin-ingressgateway -o=jsonpath='{.status.loadBalancer.ingress[0].hostname}' 2>/dev/null) - IP_ADDR=$(dig +short $HOSTNAME) + IP_ADDR=$(dig +short $HOSTNAME | head -n1) fi; echo $IP_ADDR mute: true setVariables: @@ -135,7 +117,7 @@ tasks: IP_ADDR=$(uds zarf tools kubectl get service -n istio-tenant-gateway tenant-ingressgateway -o=jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null) if [ -z $IP_ADDR ]; then HOSTNAME=$(uds zarf tools kubectl get service -n istio-tenant-gateway tenant-ingressgateway -o=jsonpath='{.status.loadBalancer.ingress[0].hostname}' 2>/dev/null) - IP_ADDR=$(dig +short $HOSTNAME) + IP_ADDR=$(dig +short $HOSTNAME | head -n1) fi; echo $IP_ADDR mute: true setVariables: @@ -147,4 +129,4 @@ tasks: - description: Adds Cluster LoadBalancer IP Addresses to match appropriate hosts names in /etc/hosts mute: true cmd: | - echo "$ADMIN_GW_IP keycloak.admin.uds.dev neuvector.admin.uds.dev grafana.admin.uds.dev demo.admin.uds.dev\n$TENANT_GW_IP sso.uds.dev demo-8080.uds.dev demo-8081.uds.dev protected.uds.dev" | sudo tee --append /etc/hosts + echo "$ADMIN_GW_IP keycloak.admin.uds.dev neuvector.admin.uds.dev grafana.admin.uds.dev demo.admin.uds.dev\n$TENANT_GW_IP sso.uds.dev demo-8080.uds.dev demo-8081.uds.dev protected.uds.dev" | sudo tee -a /etc/hosts