diff --git a/AGENTS.md b/AGENTS.md index 48bf8035514..a7e20f1716d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -73,6 +73,27 @@ bin/hypershift install --development # Install in development mode bin/hypershift-operator run # Run operator locally ``` +## Local Development Environment + +For detailed instructions on local development workflows including: +- Building custom operator images +- Installing HyperShift with custom images +- Creating and managing hosted clusters +- Running E2E tests +- Iteration workflows + +See the comprehensive guides in: +- **[HACKING.md](./HACKING.md)** - General development how-to guides +- **[.claude/skills/dev/](./.claude/skills/dev/)** - Step-by-step development skills for specific tasks: + - `build-ho-image` - Building HyperShift operator images + - `build-cpo-image` - Building control plane operator images + - `install-ho-aws` - Installing HyperShift on AWS + - `create-hc-aws` - Creating hosted clusters + - `e2e-run-aws` - Running and iterating on E2E tests + - `destroy-hc-aws` - Cleaning up hosted clusters + +These skills include environment setup, prerequisites, troubleshooting, and iteration workflows. + ## Testing Strategy ### Unit Tests diff --git a/cmd/infra/aws/iam.go b/cmd/infra/aws/iam.go index 2100dfa0f4b..2e4114c2d7b 100644 --- a/cmd/infra/aws/iam.go +++ b/cmd/infra/aws/iam.go @@ -933,20 +933,54 @@ func (o *CreateIAMOptions) CreateOIDCResources(ctx context.Context, iamClient aw ingressRoleName := output.Roles.IngressARN[strings.LastIndex(output.Roles.IngressARN, "/")+1:] // Cloud Controller Manager's (CCM) managed policy needs to be updated on ROSA to allow new permissions downstream controllers to work. - // The permissions are: - // - elasticloadbalancing:DescribeTargetGroupAttributes - // - elasticloadbalancing:ModifyTargetGroupAttributes + // The permissions are required for NLB with managed security groups feature. // + // References for hairpinning support: // https://issues.redhat.com/browse/OCPBUGS-65885 - // - // This inline policy must be removed when the following issue is resolved: // https://issues.redhat.com/browse/SREP-2895 // https://redhat-internal.slack.com/archives/C03SZLX3A10/p1765396356482459 + // + // References for managed security group support: + // https://github.com/kubernetes/cloud-provider-aws/pull/1158 + // https://issues.redhat.com/browse/SREP-3643 + // https://issues.redhat.com/browse/OCPSTRAT-1553 ccmPolicyStatement := `{ "Effect": "Allow", "Action": [ + "elasticloadbalancing:CreateLoadBalancer", + "elasticloadbalancing:DeleteLoadBalancer", + "elasticloadbalancing:DescribeLoadBalancers", + "elasticloadbalancing:ModifyLoadBalancerAttributes", + "elasticloadbalancing:CreateTargetGroup", + "elasticloadbalancing:DeleteTargetGroup", + "elasticloadbalancing:DescribeTargetGroups", "elasticloadbalancing:DescribeTargetGroupAttributes", - "elasticloadbalancing:ModifyTargetGroupAttributes" + "elasticloadbalancing:ModifyTargetGroupAttributes", + "elasticloadbalancing:DescribeTargetHealth", + "elasticloadbalancing:RegisterTargets", + "elasticloadbalancing:DeregisterTargets", + "elasticloadbalancing:CreateListener", + "elasticloadbalancing:DeleteListener", + "elasticloadbalancing:DescribeListeners", + "elasticloadbalancing:SetSecurityGroups", + "elasticloadbalancing:DescribeTags", + "elasticloadbalancing:AddTags", + "elasticloadbalancing:RemoveTags" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ec2:CreateSecurityGroup", + "ec2:DeleteSecurityGroup", + "ec2:DescribeSecurityGroups", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:RevokeSecurityGroupIngress", + "ec2:DescribeSubnets", + "ec2:DescribeVpcs", + "ec2:DescribeInstances", + "ec2:CreateTags" ], "Resource": "*" }` diff --git a/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/GCP/zz_fixture_TestControlPlaneComponents_aws_cloud_config_configmap.yaml b/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/GCP/zz_fixture_TestControlPlaneComponents_aws_cloud_config_configmap.yaml index 8832cc37ae0..1eab657951e 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/GCP/zz_fixture_TestControlPlaneComponents_aws_cloud_config_configmap.yaml +++ b/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/GCP/zz_fixture_TestControlPlaneComponents_aws_cloud_config_configmap.yaml @@ -1,7 +1,7 @@ apiVersion: v1 data: aws.conf: "[Global]\nZone = \nVPC = \nKubernetesClusterID = \nSubnetID = \nClusterServiceLoadBalancerHealthProbeMode - = Shared" + = Shared\nNLBSecurityGroupMode = Managed" kind: ConfigMap metadata: name: aws-cloud-config diff --git a/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/GCP/zz_fixture_TestControlPlaneComponents_aws_cloud_controller_manager_deployment.yaml b/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/GCP/zz_fixture_TestControlPlaneComponents_aws_cloud_controller_manager_deployment.yaml index d2f4684687b..965860552e8 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/GCP/zz_fixture_TestControlPlaneComponents_aws_cloud_controller_manager_deployment.yaml +++ b/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/GCP/zz_fixture_TestControlPlaneComponents_aws_cloud_controller_manager_deployment.yaml @@ -25,7 +25,7 @@ spec: metadata: annotations: cluster-autoscaler.kubernetes.io/safe-to-evict-local-volumes: cloud-token,tmp-dir - component.hypershift.openshift.io/config-hash: 6cd2cf9e + component.hypershift.openshift.io/config-hash: b7e59b60 hypershift.openshift.io/release-image: quay.io/openshift-release-dev/ocp-release:4.16.10-x86_64 labels: app: cloud-controller-manager diff --git a/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/TechPreviewNoUpgrade/zz_fixture_TestControlPlaneComponents_aws_cloud_config_configmap.yaml b/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/TechPreviewNoUpgrade/zz_fixture_TestControlPlaneComponents_aws_cloud_config_configmap.yaml index 8832cc37ae0..1eab657951e 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/TechPreviewNoUpgrade/zz_fixture_TestControlPlaneComponents_aws_cloud_config_configmap.yaml +++ b/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/TechPreviewNoUpgrade/zz_fixture_TestControlPlaneComponents_aws_cloud_config_configmap.yaml @@ -1,7 +1,7 @@ apiVersion: v1 data: aws.conf: "[Global]\nZone = \nVPC = \nKubernetesClusterID = \nSubnetID = \nClusterServiceLoadBalancerHealthProbeMode - = Shared" + = Shared\nNLBSecurityGroupMode = Managed" kind: ConfigMap metadata: name: aws-cloud-config diff --git a/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/TechPreviewNoUpgrade/zz_fixture_TestControlPlaneComponents_aws_cloud_controller_manager_deployment.yaml b/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/TechPreviewNoUpgrade/zz_fixture_TestControlPlaneComponents_aws_cloud_controller_manager_deployment.yaml index ec4d646c01b..6729427577f 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/TechPreviewNoUpgrade/zz_fixture_TestControlPlaneComponents_aws_cloud_controller_manager_deployment.yaml +++ b/control-plane-operator/controllers/hostedcontrolplane/testdata/aws-cloud-controller-manager/TechPreviewNoUpgrade/zz_fixture_TestControlPlaneComponents_aws_cloud_controller_manager_deployment.yaml @@ -25,7 +25,7 @@ spec: metadata: annotations: cluster-autoscaler.kubernetes.io/safe-to-evict-local-volumes: cloud-token,tmp-dir - component.hypershift.openshift.io/config-hash: 6cd2cf9e + component.hypershift.openshift.io/config-hash: b7e59b60 hypershift.openshift.io/release-image: quay.io/openshift-release-dev/ocp-release:4.16.10-x86_64 labels: app: cloud-controller-manager diff --git a/control-plane-operator/controllers/hostedcontrolplane/v2/cloud_controller_manager/aws/config.go b/control-plane-operator/controllers/hostedcontrolplane/v2/cloud_controller_manager/aws/config.go index 1e18e14b3a7..775ba0ce676 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/v2/cloud_controller_manager/aws/config.go +++ b/control-plane-operator/controllers/hostedcontrolplane/v2/cloud_controller_manager/aws/config.go @@ -4,7 +4,9 @@ import ( "fmt" "strconv" + configv1 "github.com/openshift/api/config/v1" hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + "github.com/openshift/hypershift/control-plane-operator/featuregates" component "github.com/openshift/hypershift/support/controlplane-component" corev1 "k8s.io/api/core/v1" @@ -60,6 +62,19 @@ func adaptConfig(cpContext component.WorkloadContext, cm *corev1.ConfigMap) erro baseConfig += fmt.Sprintf("\nClusterServiceSharedLoadBalancerHealthProbePort = %s", portStr) } + // Add NLBSecurityGroupMode when the AWSServiceLBNetworkSecurityGroup feature gate is enabled for this cluster. + // Check the feature gate based on the cluster's configured feature gate spec, not the global operator feature set. + var featureGateSpec *configv1.FeatureGateSpec + if cpContext.HCP.Spec.Configuration != nil { + featureGateSpec = cpContext.HCP.Spec.Configuration.FeatureGate + } + + isEnabled := featuregates.IsFeatureEnabledInFeatureGateSpec(featureGateSpec, featuregates.AWSServiceLBNetworkSecurityGroup) + fmt.Printf("DEBUG: AWSServiceLBNetworkSecurityGroup enabled=%v, featureGateSpec=%+v\n", isEnabled, featureGateSpec) + if isEnabled { + baseConfig += "\nNLBSecurityGroupMode = Managed" + } + cm.Data[configKey] = baseConfig return nil } diff --git a/control-plane-operator/controllers/hostedcontrolplane/v2/cloud_controller_manager/aws/config_test.go b/control-plane-operator/controllers/hostedcontrolplane/v2/cloud_controller_manager/aws/config_test.go index 95d8d082934..d26143a5945 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/v2/cloud_controller_manager/aws/config_test.go +++ b/control-plane-operator/controllers/hostedcontrolplane/v2/cloud_controller_manager/aws/config_test.go @@ -6,11 +6,14 @@ import ( hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" assets "github.com/openshift/hypershift/control-plane-operator/controllers/hostedcontrolplane/v2/assets" + "github.com/openshift/hypershift/control-plane-operator/featuregates" "github.com/openshift/hypershift/control-plane-operator/hostedclusterconfigoperator/api" component "github.com/openshift/hypershift/support/controlplane-component" "github.com/openshift/hypershift/support/testutil" "github.com/openshift/hypershift/support/util" + configv1 "github.com/openshift/api/config/v1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" @@ -101,3 +104,95 @@ func TestConfigErrorStates(t *testing.T) { }) } } + +func TestConfigWithCustomAnnotations(t *testing.T) { + tests := []struct { + name string + hcp *hyperv1.HostedControlPlane + configContains string + configNotContains string + }{ + { + name: "When Default feature set is set it should not have custom configuration", + hcp: func() *hyperv1.HostedControlPlane { + hcp := newTestHCP(map[string]string{}) + hcp.Spec.Configuration = &hyperv1.ClusterConfiguration{ + FeatureGate: &configv1.FeatureGateSpec{ + FeatureGateSelection: configv1.FeatureGateSelection{ + FeatureSet: configv1.Default, + }, + }, + } + return hcp + }(), + configContains: "", + configNotContains: "NLBSecurityGroupMode", + }, + { + name: "With TechPreviewNoUpgrade feature set it should have NLBSecurityGroupMode set to Managed in the config", + hcp: func() *hyperv1.HostedControlPlane { + hcp := newTestHCP(map[string]string{}) + hcp.Spec.Configuration = &hyperv1.ClusterConfiguration{ + FeatureGate: &configv1.FeatureGateSpec{ + FeatureGateSelection: configv1.FeatureGateSelection{ + FeatureSet: configv1.TechPreviewNoUpgrade, + }, + }, + } + return hcp + }(), + configContains: "NLBSecurityGroupMode = Managed", + }, + { + name: "With CustomNoUpgrade feature set it should have NLBSecurityGroupMode set to Managed in the config when the feature gate is enabled", + hcp: func() *hyperv1.HostedControlPlane { + hcp := newTestHCP(map[string]string{}) + hcp.Spec.Configuration = &hyperv1.ClusterConfiguration{ + FeatureGate: &configv1.FeatureGateSpec{ + FeatureGateSelection: configv1.FeatureGateSelection{ + FeatureSet: configv1.CustomNoUpgrade, + CustomNoUpgrade: &configv1.CustomFeatureGates{ + Enabled: []configv1.FeatureGateName{ + "AWSServiceLBNetworkSecurityGroup", + }, + }, + }, + }, + } + return hcp + }(), + configContains: "NLBSecurityGroupMode = Managed", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Ensure global feature gate is set to Default to verify that adaptConfig + // reads the feature set from HCP.Spec.Configuration, not the global gate. + // This proves the fix for the issue where the global gate was incorrectly + // used instead of the per-cluster configuration. + featuregates.ConfigureFeatureSet(string(configv1.Default)) + + cm := &corev1.ConfigMap{} + _, _, err := assets.LoadManifestInto(ComponentName, "config.yaml", cm) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + cpContext := component.WorkloadContext{ + HCP: tt.hcp, + } + err = adaptConfig(cpContext, cm) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + // Verify expected content is present (if specified) + if tt.configContains != "" && !strings.Contains(cm.Data[configKey], tt.configContains) { + t.Fatalf("expected config to contain %q, got: %s", tt.configContains, cm.Data[configKey]) + } + // Verify unexpected content is absent (if specified) + if tt.configNotContains != "" && strings.Contains(cm.Data[configKey], tt.configNotContains) { + t.Fatalf("expected config to NOT contain %q, got: %s", tt.configNotContains, cm.Data[configKey]) + } + }) + } +} diff --git a/control-plane-operator/featuregates/featuregates.go b/control-plane-operator/featuregates/featuregates.go index 7312da65682..015da2f44ce 100644 --- a/control-plane-operator/featuregates/featuregates.go +++ b/control-plane-operator/featuregates/featuregates.go @@ -12,6 +12,7 @@ import ( const ( ExternalOIDCWithUIDAndExtraClaimMappings featuregate.Feature = "ExternalOIDCWithUIDAndExtraClaimMappings" ExternalOIDCWithUpstreamParity featuregate.Feature = "ExternalOIDCWithUpstreamParity" + AWSServiceLBNetworkSecurityGroup featuregate.Feature = "AWSServiceLBNetworkSecurityGroup" ) // Initialize new features here @@ -20,12 +21,15 @@ var ( externalOIDCWithUIDAndExtraClaimMappingsFeature = featuregates.NewFeature(ExternalOIDCWithUIDAndExtraClaimMappings, featuregates.WithEnableForFeatureSets(configv1.TechPreviewNoUpgrade, configv1.Default)) externalOIDCWithUpstreamParityFeature = featuregates.NewFeature(ExternalOIDCWithUpstreamParity, featuregates.WithEnableForFeatureSets(configv1.TechPreviewNoUpgrade)) + awsServiceLBNetworkSecurityGroupFeature = featuregates.NewFeature(AWSServiceLBNetworkSecurityGroup, featuregates.WithEnableForFeatureSets(configv1.TechPreviewNoUpgrade)) ) func init() { // Add featuregates here allFeatures.AddFeature(externalOIDCWithUIDAndExtraClaimMappingsFeature) allFeatures.AddFeature(externalOIDCWithUpstreamParityFeature) + allFeatures.AddFeature(awsServiceLBNetworkSecurityGroupFeature) + // Default to configuring the Default featureset ConfigureFeatureSet(string(configv1.Default)) } @@ -55,3 +59,57 @@ func ConfigureFeatureSet(featureSet string) { globalGate = featureGate } + +// IsFeatureEnabledInFeatureGateSpec checks if a feature gate is enabled based on a FeatureGateSpec. +// This allows checking feature gates for a given configuration without changing the global gate. +// It handles both fixed feature sets (Default, TechPreviewNoUpgrade, DevPreviewNoUpgrade) and +// custom feature sets (CustomNoUpgrade) where features are explicitly enabled/disabled. +// If the feature gate spec is nil, it falls back to checking the global gate. +func IsFeatureEnabledInFeatureGateSpec(featureGateSpec *configv1.FeatureGateSpec, feature featuregate.Feature) bool { + if featureGateSpec == nil { + // No feature gate configuration, fall back to global gate + return globalGate.Enabled(feature) + } + + featureSet := featureGateSpec.FeatureSet + + // Handle CustomNoUpgrade feature sets by checking the explicit enabled/disabled lists + if featureSet == configv1.CustomNoUpgrade { + if featureGateSpec.CustomNoUpgrade == nil { + // CustomNoUpgrade without custom configuration, fall back to global gate + return globalGate.Enabled(feature) + } + + featureName := configv1.FeatureGateName(feature) + + // Check if explicitly disabled + for _, disabled := range featureGateSpec.CustomNoUpgrade.Disabled { + if disabled == featureName { + return false + } + } + + // Check if explicitly enabled + for _, enabled := range featureGateSpec.CustomNoUpgrade.Enabled { + if enabled == featureName { + return true + } + } + + // Not in either list, fall back to the feature's default for Default feature set + featureGate, err := allFeatures.FeatureGatesForFeatureSet(configv1.Default) + if err != nil { + return globalGate.Enabled(feature) + } + return featureGate.Enabled(feature) + } + + // Handle fixed feature sets (Default, TechPreviewNoUpgrade, DevPreviewNoUpgrade) + featureGate, err := allFeatures.FeatureGatesForFeatureSet(featureSet) + if err != nil { + // If there's an error (e.g., unknown feature set), fall back to checking the global gate + return globalGate.Enabled(feature) + } + + return featureGate.Enabled(feature) +} diff --git a/test/e2e/create_cluster_test.go b/test/e2e/create_cluster_test.go index 1279caf34a3..5890d4159db 100644 --- a/test/e2e/create_cluster_test.go +++ b/test/e2e/create_cluster_test.go @@ -14,8 +14,7 @@ import ( "time" . "github.com/onsi/gomega" - configv1 "github.com/openshift/api/config/v1" - operatorv1 "github.com/openshift/api/operator/v1" + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" "github.com/openshift/hypershift/api/util/ipnet" "github.com/openshift/hypershift/hypershift-operator/controllers/manifests" @@ -24,12 +23,17 @@ import ( e2eutil "github.com/openshift/hypershift/test/e2e/util" "github.com/openshift/hypershift/test/integration" integrationframework "github.com/openshift/hypershift/test/integration/framework" + + configv1 "github.com/openshift/api/config/v1" + operatorv1 "github.com/openshift/api/operator/v1" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/tools/clientcmd" "k8s.io/utils/ptr" + crclient "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -427,16 +431,6 @@ func TestOnCreateAPIUX(t *testing.T) { }, expectedErrorSubstring: "", }, - { - name: "when GCP endpointAccess is PublicAndPrivate it should pass", - mutateInput: func(hc *hyperv1.HostedCluster) { - hc.Spec.Platform.Type = hyperv1.GCPPlatform - spec := validGCPPlatformSpec() - spec.EndpointAccess = hyperv1.GCPEndpointAccessPublicAndPrivate - hc.Spec.Platform.GCP = spec - }, - expectedErrorSubstring: "", - }, { name: "when GCP resource names contain hyphens correctly it should pass", mutateInput: func(hc *hyperv1.HostedCluster) { @@ -2537,6 +2531,8 @@ func TestCreateCluster(t *testing.T) { if !e2eutil.IsLessThan(e2eutil.Version418) { clusterOpts.FeatureSet = string(configv1.TechPreviewNoUpgrade) } + // Configure feature gates globally so tests can check if features are enabled + //featuregates.ConfigureFeatureSet(string(clusterOpts.FeatureSet)) if globalOpts.Platform == hyperv1.AzurePlatform || globalOpts.Platform == hyperv1.AWSPlatform { // Configure Ingress Operator with custom endpointPublishingStrategy before cluster creation @@ -2617,6 +2613,14 @@ func TestCreateCluster(t *testing.T) { // ensure Ingress Operator configuration is properly applied e2eutil.EnsureIngressOperatorConfiguration(t, ctx, mgtClient, guestClient, hostedCluster) } + + e2eutil.EnsureAWSCCMWithCustomizations(t, ctx, &e2eutil.E2eTestConfig{ + MgtClient: mgtClient, + GuestClient: guestClient, + HostedCluster: hostedCluster, + AWSCredsFile: clusterOpts.AWSPlatform.Credentials.AWSCredentialsFile, + Platform: globalOpts.Platform, + }) }).WithAssetReader(content.ReadFile). Execute(&clusterOpts, globalOpts.Platform, globalOpts.ArtifactDir, "create-cluster", globalOpts.ServiceAccountSigningKey) } @@ -2807,7 +2811,11 @@ func TestCreateClusterCustomConfig(t *testing.T) { e2eutil.NewHypershiftTest(t, ctx, func(t *testing.T, g Gomega, mgtClient crclient.Client, hostedCluster *hyperv1.HostedCluster) { switch globalOpts.Platform { case hyperv1.AWSPlatform: + g.Expect(hostedCluster.Spec.SecretEncryption).ToNot(BeNil(), "SecretEncryption must be set") + g.Expect(hostedCluster.Spec.SecretEncryption.KMS).ToNot(BeNil(), "SecretEncryption.KMS must be set") + g.Expect(hostedCluster.Spec.SecretEncryption.KMS.AWS).ToNot(BeNil(), "KMS.AWS must be set") g.Expect(hostedCluster.Spec.SecretEncryption.KMS.AWS.ActiveKey.ARN).To(Equal(*kmsKeyArn)) + g.Expect(hostedCluster.Spec.SecretEncryption.KMS.AWS.Auth).ToNot(BeNil(), "KMS.AWS.Auth must be set") g.Expect(hostedCluster.Spec.SecretEncryption.KMS.AWS.Auth.AWSKMSRoleARN).ToNot(BeEmpty()) case hyperv1.AzurePlatform: g.Expect(hostedCluster.Spec.SecretEncryption).ToNot(BeNil(), "SecretEncryption must be set") diff --git a/test/e2e/util/aws_ccm.go b/test/e2e/util/aws_ccm.go new file mode 100644 index 00000000000..8e392754775 --- /dev/null +++ b/test/e2e/util/aws_ccm.go @@ -0,0 +1,294 @@ +package util + +import ( + "context" + "fmt" + "strings" + "testing" + "time" + + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + awsutil "github.com/openshift/hypershift/cmd/infra/aws/util" + "github.com/openshift/hypershift/control-plane-operator/featuregates" + + "github.com/aws/aws-sdk-go-v2/aws" + elbv2 "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + crclient "sigs.k8s.io/controller-runtime/pkg/client" + + gomega "github.com/onsi/gomega" +) + +// E2eTestConfig is a configuration struct for AWS CCM tests. +type E2eTestConfig struct { + MgtClient crclient.Client + GuestClient crclient.Client + HostedCluster *hyperv1.HostedCluster + AWSCredsFile string + Platform hyperv1.PlatformType +} + +func EnsureAWSCCMWithCustomizations(t *testing.T, ctx context.Context, cfg *E2eTestConfig) { + t.Run("AWSCCMWithCustomizations", func(t *testing.T) { + ensureAWSCCMWithCustomizations(t, ctx, cfg) + }) +} + +// EnsureAWSCCMWithCustomizations implements tests that exercise AWS CCM controller for critical features. +// This test is only supported on platform AWS, as well runs only when the feature gate AWSServiceLBNetworkSecurityGroup is enabled. +// A hosted cluster with TechPreviewNoUpgrade feature set is supported. +// It must skip tests not enabled in the feature set. +func ensureAWSCCMWithCustomizations(t *testing.T, ctx context.Context, cfg *E2eTestConfig) { + validate_extractLoadBalancerNameFromHostname(t) + + AtLeast(t, Version421) + if cfg.Platform != hyperv1.AWSPlatform { + t.Skip("test only supported on platform AWS") + } + + // Check if the feature is enabled in the feature set + featureGateSpec := cfg.HostedCluster.Spec.Configuration.FeatureGate + if !featuregates.IsFeatureEnabledInFeatureGateSpec(featureGateSpec, featuregates.AWSServiceLBNetworkSecurityGroup) { + t.Logf("Feature gate is not enabled in the feature set: %s", featureGateSpec.FeatureSet) + t.Skipf("Skipping test: feature gate is not enabled in the feature set: %s", featureGateSpec.FeatureSet) + } + + // Test case: Validate managed security groups in TechPreviewNoUpgrade feature set + t.Run("When AWSServiceLBNetworkSecurityGroup is enabled it must have config NLBSecurityGroupMode=Managed entry in cloud-config configmap", func(t *testing.T) { + g := gomega.NewWithT(t) + + t.Logf("Validating aws-cloud-config ConfigMap contains entry NLBSecurityGroupMode=Managed") + + // The control plane namespace is {namespace}-{name} + controlPlaneNamespace := fmt.Sprintf("%s-%s", cfg.HostedCluster.Namespace, cfg.HostedCluster.Name) + t.Logf("Using control plane namespace: %s", controlPlaneNamespace) + + // Ensure the configuration is present when the feature gate is enabled + EventuallyObject(t, ctx, "NLBSecurityGroupMode = Managed entry exists in aws-cloud-config ConfigMap", + func(ctx context.Context) (*corev1.ConfigMap, error) { + cm := &corev1.ConfigMap{} + err := cfg.MgtClient.Get(ctx, crclient.ObjectKey{ + Namespace: controlPlaneNamespace, + Name: "aws-cloud-config", + }, cm) + return cm, err + }, + []Predicate[*corev1.ConfigMap]{func(cm *corev1.ConfigMap) (done bool, reasons string, err error) { + awsConf, exists := cm.Data["aws.conf"] + if !exists { + return false, "aws.conf key not found in ConfigMap", nil + } + + t.Logf("verifying NLBSecurityGroupMode is present in cloud config") + g.Expect(awsConf).To(gomega.ContainSubstring("NLBSecurityGroupMode"), + "NLBSecurityGroupMode must be present in cloud-config when feature gate is enabled") + + t.Logf("verifying NLBSecurityGroupMode is set to Managed") + g.Expect(awsConf).To(gomega.MatchRegexp(`NLBSecurityGroupMode\s*=\s*Managed`), + "NLBSecurityGroupMode must be set to 'Managed' in aws-config when feature gate is enabled") + + t.Logf("Successfully validated cloud-config contains NLBSecurityGroupMode = Managed") + + return true, "Successfully validated aws-config", nil + }, + }, + WithTimeout(2*time.Minute), + ) + }) + + // Test case: Create custom service type NLB in the hosted cluster, the NLB resource must + // have a security group attached to it + // Note: this test must run only when the feature gate AWSServiceLBNetworkSecurityGroup is enabled. + t.Run("When AWSServiceLBNetworkSecurityGroup is enabled it must create a LoadBalancer NLB with managed security group attached", func(t *testing.T) { + g := gomega.NewWithT(t) + + // Create a test namespace in the guest cluster + testNS := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-ccm-nlb-sg", + }, + } + t.Logf("Creating test namespace %s in guest cluster", testNS.Name) + + err := cfg.GuestClient.Create(ctx, testNS) + g.Expect(err).NotTo(gomega.HaveOccurred(), "failed to create test namespace") + defer func() { + t.Logf("Cleaning up test namespace %s", testNS.Name) + _ = cfg.GuestClient.Delete(ctx, testNS) + }() + + // Create a LoadBalancer service + testSvc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-ccm-nlb-sg-svc", + Namespace: testNS.Name, + Annotations: map[string]string{ + "service.beta.kubernetes.io/aws-load-balancer-type": "nlb", + }, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + Selector: map[string]string{ + "app": "test-ccm-nlb-sg", + }, + Ports: []corev1.ServicePort{ + { + Name: "http", + Port: 80, + TargetPort: intstr.FromInt(8080), + Protocol: corev1.ProtocolTCP, + }, + }, + }, + } + t.Logf("Creating LoadBalancer service %s/%s", testSvc.Namespace, testSvc.Name) + err = cfg.GuestClient.Create(ctx, testSvc) + g.Expect(err).NotTo(gomega.HaveOccurred(), "failed to create LoadBalancer service") + + // Wait for the LoadBalancer to be provisioned and get hostname + var lbHostname string + EventuallyObject(t, ctx, "LoadBalancer service to have ingress hostname", + func(ctx context.Context) (*corev1.Service, error) { + svc := &corev1.Service{} + err := cfg.GuestClient.Get(ctx, crclient.ObjectKey{ + Namespace: testNS.Name, + Name: testSvc.Name, + }, svc) + return svc, err + }, + []Predicate[*corev1.Service]{ + func(svc *corev1.Service) (done bool, reasons string, err error) { + if len(svc.Status.LoadBalancer.Ingress) == 0 { + return false, "LoadBalancer ingress list is empty", nil + } + if svc.Status.LoadBalancer.Ingress[0].Hostname == "" { + return false, "LoadBalancer hostname is empty", nil + } + lbHostname = svc.Status.LoadBalancer.Ingress[0].Hostname + return true, fmt.Sprintf("LoadBalancer hostname is %s", lbHostname), nil + }, + }, + WithTimeout(5*time.Minute), + ) + t.Logf("LoadBalancer provisioned with hostname: %s", lbHostname) + + lbName := extractLoadBalancerNameFromHostname(lbHostname) + g.Expect(lbName).NotTo(gomega.BeEmpty(), "load balancer name should be extracted from hostname") + t.Logf("Extracted load balancer name: %s", lbName) + + t.Logf("Verifying load balancer has security groups using AWS SDK") + awsSession := awsutil.NewSession(ctx, "e2e-ccm-nlb-sg", cfg.AWSCredsFile, "", "", cfg.HostedCluster.Spec.Platform.AWS.Region) + g.Expect(awsSession).NotTo(gomega.BeNil(), "failed to create AWS session") + + awsConfig := awsutil.NewConfig() + g.Expect(awsConfig).NotTo(gomega.BeNil(), "failed to create AWS config") + + elbv2Client := elbv2.NewFromConfig(*awsSession, func(o *elbv2.Options) { + o.Retryer = awsConfig() + }) + g.Expect(elbv2Client).NotTo(gomega.BeNil(), "failed to create ELBv2 client") + + describeLBInput := &elbv2.DescribeLoadBalancersInput{ + Names: []string{lbName}, + } + + // Wait for the load balancer to exist and become active before validating attributes like SecurityGroups. + // This avoids flakes where the Service is created but the LB is still provisioning. + t.Logf("Waiting for load balancer %q to become available (up to ~3 minutes)", lbName) + waiter := elbv2.NewLoadBalancerAvailableWaiter(elbv2Client, func(o *elbv2.LoadBalancerAvailableWaiterOptions) { + o.MinDelay = 5 * time.Second + o.MaxDelay = 30 * time.Second + }) + err = waiter.Wait(ctx, describeLBInput, 3*time.Minute) + g.Expect(err).NotTo(gomega.HaveOccurred(), "load balancer did not become available in time") + + // Describe the load balancer + t.Logf("Describing load balancer to check for security groups") + + describeLBOutput, err := elbv2Client.DescribeLoadBalancers(ctx, describeLBInput) + g.Expect(err).NotTo(gomega.HaveOccurred(), "failed to describe load balancer") + g.Expect(len(describeLBOutput.LoadBalancers)).To(gomega.BeNumerically(">", 0), "no load balancers found with name %s", lbName) + + lb := describeLBOutput.LoadBalancers[0] + t.Logf("Load balancer ARN: %s", aws.ToString(lb.LoadBalancerArn)) + t.Logf("Load balancer Type: %s", string(lb.Type)) + t.Logf("Load balancer Security Groups: %v", lb.SecurityGroups) + + // Verify security groups are attached + g.Expect(len(lb.SecurityGroups)).To(gomega.BeNumerically(">", 0), "load balancer should have security groups attached when NLBSecurityGroupMode = Managed") + + t.Logf("Successfully validated that load balancer has %d security group(s) attached", len(lb.SecurityGroups)) + for i, sg := range lb.SecurityGroups { + t.Logf(" Security Group %d: %s", i+1, sg) + } + }) +} + +// extractLoadBalancerNameFromHostname extracts the load balancer name from the DNS hostname. +// The hostname is in the format of -.elb..amazonaws.com. +// The function drops only the last hyphen segment. +// Example: +// - Input: "e2e-v7-fnt8p-ext-9a316db0952d7e14.elb.us-east-1.amazonaws.com" +// - Output: "e2e-v7-fnt8p-ext" +// - Input: "af1c7bcc09ce1420db0292d91f0dad1f-f4ad6ce6794c3afd.elb.us-east-1.amazonaws.com" +// - Output: "af1c7bcc09ce1420db0292d91f0dad1f" +// - Input: "a7f9d8c870a2b44c39d9565e2ec22e81-1194117244.us-east-1.elb.amazonaws.com" +// - Output: "a7f9d8c870a2b44c39d9565e2ec22e81" +func extractLoadBalancerNameFromHostname(hostname string) string { + firstLabel := strings.SplitN(hostname, ".", 2)[0] + lastHyphen := strings.LastIndex(firstLabel, "-") + if lastHyphen == -1 { + return firstLabel + } + return firstLabel[:lastHyphen] +} + +func validate_extractLoadBalancerNameFromHostname(t *testing.T) { + // Test case: validate the load balancer name extraction function from the DNS hostname. + t.Run("When extracting a load balancer name from a DNS hostname it should drop only the last hyphen segment", func(t *testing.T) { + cases := []struct { + name string + hostname string + want string + }{ + { + name: "NLB with multiple hyphens in name", + hostname: "e2e-v7-fnt8p-ext-9a316db0952d7e14.elb.us-east-1.amazonaws.com", + want: "e2e-v7-fnt8p-ext", + }, + { + name: "NLB with only hashed name and suffix", + hostname: "af1c7bcc09ce1420db0292d91f0dad1f-f4ad6ce6794c3afd.elb.us-east-1.amazonaws.com", + want: "af1c7bcc09ce1420db0292d91f0dad1f", + }, + { + name: "Classic ELB style hostname", + hostname: "a7f9d8c870a2b44c39d9565e2ec22e81-1194117244.us-east-1.elb.amazonaws.com", + want: "a7f9d8c870a2b44c39d9565e2ec22e81", + }, + { + name: "Hostname without dot", + hostname: "foo-bar-baz-123", + want: "foo-bar-baz", + }, + { + name: "Hostname without hyphen", + hostname: "foo.elb.us-east-1.amazonaws.com", + want: "foo", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := extractLoadBalancerNameFromHostname(tc.hostname) + if got != tc.want { + t.Fatalf("unexpected load balancer name extracted from hostname %q: got %q, want %q", tc.hostname, got, tc.want) + } + }) + } + }) +} diff --git a/test/e2e/util/util.go b/test/e2e/util/util.go index 1a54923ffb0..858942f303c 100644 --- a/test/e2e/util/util.go +++ b/test/e2e/util/util.go @@ -2203,6 +2203,18 @@ func EnsureKubeAPIDNSNameCustomCert(t *testing.T, ctx context.Context, mgmtClien customApiServerHost = fmt.Sprintf("api-custom-cert-%s.%s", entryHostedCluster.Spec.InfraID, serviceDomain) } + // For AWS, use ExternalDNSDomain if set, otherwise fall back to BaseDomain for local development + if entryHostedCluster.Spec.Platform.Type == hyperv1.AWSPlatform { + if clusterOpts.ExternalDNSDomain != "" { + serviceDomain = clusterOpts.ExternalDNSDomain + customApiServerHost = fmt.Sprintf("api-custom-cert-%s.%s", entryHostedCluster.Spec.InfraID, serviceDomain) + } else if clusterOpts.BaseDomain != "" { + // Use base domain for local development environments where external-dns is not available + serviceDomain = clusterOpts.BaseDomain + customApiServerHost = fmt.Sprintf("api-custom-cert-%s.%s", entryHostedCluster.Spec.InfraID, serviceDomain) + t.Logf("Using base domain for custom cert DNS: %s", serviceDomain) + } + } g := NewWithT(t) if !hyperutil.IsPublicHC(entryHostedCluster) { return