diff --git a/cluster/config-defaults.yaml b/cluster/config-defaults.yaml index 03cffcbed4..14d20214a4 100644 --- a/cluster/config-defaults.yaml +++ b/cluster/config-defaults.yaml @@ -356,8 +356,10 @@ skipper_eastwest_dns_log_enabled: "false" # if enabled adds port 8080 as svc port to eastwest svc skipper_ingress_eastwest_additional_port: "false" -# if enabled adds service.kubernetes.io/topology-mode: auto to the eastwest service -skipper_ingress_eastwest_topology_mode_auto: "true" +# if enabled adds service.kubernetes.io/topology-mode: auto to the eastwest service, tries to add safety automatically by enabling/disabling zone awareness +skipper_ingress_eastwest_topology_mode_auto: "false" +# if enabled adds trafficDistribution: PreferSameZone, ignore all magic just make it zone aware +skipper_ingress_eastwest_zone_aware_clusterip: "true" # skipper tcp lifo # See: https://opensource.zalando.com/skipper/operation/operation/#tcp-lifo diff --git a/cluster/manifests/02-admission-control/deployment.yaml b/cluster/manifests/02-admission-control/deployment.yaml index 0bd08bc6db..906c4d59e8 100644 --- a/cluster/manifests/02-admission-control/deployment.yaml +++ b/cluster/manifests/02-admission-control/deployment.yaml @@ -49,7 +49,7 @@ spec: priorityClassName: system-cluster-critical containers: - name: admission-controller - image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/admission-controller:master-305 + image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/admission-controller:master-307 lifecycle: preStop: sleep: diff --git a/cluster/manifests/02-vertical-pod-autoscaler/rbac.yaml b/cluster/manifests/02-vertical-pod-autoscaler/rbac.yaml index abc9859554..c6397c6045 100644 --- a/cluster/manifests/02-vertical-pod-autoscaler/rbac.yaml +++ b/cluster/manifests/02-vertical-pod-autoscaler/rbac.yaml @@ -137,6 +137,12 @@ rules: - pods/eviction verbs: - create + - apiGroups: + - "" + resources: + - pods/resize + verbs: + - patch --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding diff --git a/cluster/manifests/03-skipper-validation-webhook/deployment.yaml b/cluster/manifests/03-skipper-validation-webhook/deployment.yaml index 1fa7ba88b5..207c3b5740 100644 --- a/cluster/manifests/03-skipper-validation-webhook/deployment.yaml +++ b/cluster/manifests/03-skipper-validation-webhook/deployment.yaml @@ -92,7 +92,7 @@ spec: seconds: 10 {{- else if eq .Cluster.ConfigItems.skipper_ingress_swarm_type "valkey" }} - name: valkey-sidecar - image: container-registry.zalando.net/library/valkey-9-alpine:9-alpine3.22-20260330 + image: container-registry.zalando.net/library/valkey-9-alpine:9-alpine3.22-20260414 args: - valkey-server - --save @@ -128,7 +128,7 @@ spec: seconds: 10 {{ end }} - name: skipper-admission-webhook - image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/skipper:v0.24.64 + image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/skipper:v0.24.71 env: {{ if or (eq .Cluster.ConfigItems.skipper_local_tokeninfo "production") (eq .Cluster.ConfigItems.skipper_local_tokeninfo "bridge") }} - name: LOCAL_TOKENINFO diff --git a/cluster/manifests/deployment-service/controller-vpa.yaml b/cluster/manifests/deployment-service/controller-vpa.yaml index 07df7252bc..24ec587b34 100644 --- a/cluster/manifests/deployment-service/controller-vpa.yaml +++ b/cluster/manifests/deployment-service/controller-vpa.yaml @@ -12,7 +12,7 @@ spec: kind: Deployment name: "deployment-service-controller" updatePolicy: - updateMode: Recreate + updateMode: InPlaceOrRecreate resourcePolicy: containerPolicies: - containerName: "deployment-service-controller" diff --git a/cluster/manifests/deployment-service/status-service-deployment.yaml b/cluster/manifests/deployment-service/status-service-deployment.yaml index c666475e4e..7996fbd4f5 100644 --- a/cluster/manifests/deployment-service/status-service-deployment.yaml +++ b/cluster/manifests/deployment-service/status-service-deployment.yaml @@ -10,7 +10,7 @@ metadata: application: "deployment-service" component: "status-service" spec: - replicas: 3 + replicas: 2 selector: matchLabels: application: "deployment-service" @@ -73,11 +73,11 @@ spec: name: http resources: requests: - cpu: "10m" - memory: "7Gi" + cpu: "100m" + memory: "1Gi" limits: - cpu: "10m" - memory: "7Gi" + cpu: "100m" + memory: "1Gi" readinessProbe: httpGet: port: 8080 diff --git a/cluster/manifests/deployment-service/status-service-vpa.yaml b/cluster/manifests/deployment-service/status-service-vpa.yaml index 1c4d9be45e..10f11406e6 100644 --- a/cluster/manifests/deployment-service/status-service-vpa.yaml +++ b/cluster/manifests/deployment-service/status-service-vpa.yaml @@ -12,7 +12,7 @@ spec: kind: Deployment name: "deployment-service-status-service" updatePolicy: - updateMode: Recreate + updateMode: InPlaceOrRecreate resourcePolicy: containerPolicies: - containerName: "deployment-service-status-service" diff --git a/cluster/manifests/flannel/daemonset.yaml b/cluster/manifests/flannel/daemonset.yaml index 52f152d83c..49395356cd 100644 --- a/cluster/manifests/flannel/daemonset.yaml +++ b/cluster/manifests/flannel/daemonset.yaml @@ -69,7 +69,7 @@ spec: failureThreshold: 30 periodSeconds: 10 - name: kube-flannel - image: container-registry.zalando.net/teapot/flannel:v0.28.2-master-46 + image: container-registry.zalando.net/teapot/flannel:v0.28.4-master-47 command: - /opt/bin/flanneld args: diff --git a/cluster/manifests/kube-metrics-adapter/deployment.yaml b/cluster/manifests/kube-metrics-adapter/deployment.yaml index 42faec6e73..54abe32b7b 100644 --- a/cluster/manifests/kube-metrics-adapter/deployment.yaml +++ b/cluster/manifests/kube-metrics-adapter/deployment.yaml @@ -27,7 +27,7 @@ spec: serviceAccountName: custom-metrics-apiserver containers: - name: kube-metrics-adapter - image: container-registry.zalando.net/teapot/kube-metrics-adapter:v0.2.8 + image: container-registry.zalando.net/teapot/kube-metrics-adapter:v0.2.8-16-g6d6c08e env: - name: AWS_REGION value: {{ .Cluster.Region }} diff --git a/cluster/manifests/role-sync-controller/cronjob.yaml b/cluster/manifests/role-sync-controller/cronjob.yaml index a532b82c2a..adb8398021 100644 --- a/cluster/manifests/role-sync-controller/cronjob.yaml +++ b/cluster/manifests/role-sync-controller/cronjob.yaml @@ -33,7 +33,7 @@ spec: restartPolicy: Never containers: - name: role-sync-controller - image: container-registry.zalando.net/teapot/role-sync-controller:main-21 + image: container-registry.zalando.net/teapot/role-sync-controller:main-24 args: - --subject-group=PowerUser - --subject-group=Manual diff --git a/cluster/manifests/sandbox-controller/30-deployment.yaml b/cluster/manifests/sandbox-controller/30-deployment.yaml index a220b1d5ce..c6dafcbeab 100644 --- a/cluster/manifests/sandbox-controller/30-deployment.yaml +++ b/cluster/manifests/sandbox-controller/30-deployment.yaml @@ -1,4 +1,4 @@ -# {{ $image := "container-registry.zalando.net/gwproxy/sandbox-controller:main-65" }} +# {{ $image := "container-registry.zalando.net/gwproxy/sandbox-controller:main-69" }} # {{ $version := index (split $image ":") 1 }} {{ if eq .Cluster.ConfigItems.sandbox_controller_enabled "true" }} diff --git a/cluster/manifests/shadow-traffic-controller/30-deployment.yaml b/cluster/manifests/shadow-traffic-controller/30-deployment.yaml index 38d1b1cbcd..568dd1308f 100644 --- a/cluster/manifests/shadow-traffic-controller/30-deployment.yaml +++ b/cluster/manifests/shadow-traffic-controller/30-deployment.yaml @@ -1,4 +1,4 @@ -# {{ $image := "container-registry.zalando.net/gwproxy/shadow-traffic-controller:main-24" }} +# {{ $image := "container-registry.zalando.net/gwproxy/shadow-traffic-controller:main-29" }} # {{ $version := index (split $image ":") 1 }} # {{ if eq .Cluster.ConfigItems.shadow_traffic_controller_enabled "true" }} apiVersion: apps/v1 diff --git a/cluster/manifests/skipper/deployment.yaml b/cluster/manifests/skipper/deployment.yaml index 652a35c4af..c93fc0e2b8 100644 --- a/cluster/manifests/skipper/deployment.yaml +++ b/cluster/manifests/skipper/deployment.yaml @@ -1,6 +1,6 @@ {{/* image-updater-bot detects *image variables so use name with suffix to disable it for the main image */}} -{{ $main_image_updated_manually := "container-registry.zalando.net/teapot/skipper-internal:v0.24.58-1387" }} +{{ $main_image_updated_manually := "container-registry.zalando.net/teapot/skipper-internal:v0.24.64-1393" }} {{ $canary_image := "container-registry.zalando.net/teapot/skipper-internal:v0.24.64-1393" }} {{/* Allow to override manually canary image by config item */}} diff --git a/cluster/manifests/skipper/service-eks-internal.yaml b/cluster/manifests/skipper/service-eks-internal.yaml index 45f2fa4955..0106ffd89a 100644 --- a/cluster/manifests/skipper/service-eks-internal.yaml +++ b/cluster/manifests/skipper/service-eks-internal.yaml @@ -5,6 +5,7 @@ metadata: annotations: external-dns.alpha.kubernetes.io/hostname: skipper-ingress-eks.{{ .Values.hosted_zone }} service.beta.kubernetes.io/aws-load-balancer-additional-resource-tags: application=skipper-ingress,component=ingress + service.beta.kubernetes.io/aws-load-balancer-attributes: dns_record.client_routing_policy=availability_zone_affinity,load_balancing.cross_zone.enabled=false service.beta.kubernetes.io/aws-load-balancer-backend-protocol: tcp # SG of the old cluster worker nodes service.beta.kubernetes.io/aws-load-balancer-extra-security-groups: {{ .Cluster.ConfigItems.worker_sg_legacy_cluster }} diff --git a/cluster/manifests/skipper/service-internal.yaml b/cluster/manifests/skipper/service-internal.yaml index e0446354c9..07b23597f0 100644 --- a/cluster/manifests/skipper/service-internal.yaml +++ b/cluster/manifests/skipper/service-internal.yaml @@ -12,6 +12,9 @@ metadata: component: ingress spec: type: ClusterIP +{{- if eq .Cluster.ConfigItems.skipper_ingress_eastwest_zone_aware_clusterip "true" }} + trafficDistribution: PreferSameZone +{{- end}} {{- if eq .Cluster.Provider "zalando-eks" }} clusterIP: {{ nthAddressFromCIDR .Cluster.ConfigItems.service_cidr 50 }} {{- else}} diff --git a/cluster/manifests/skipper/skipper-valkey.yaml b/cluster/manifests/skipper/skipper-valkey.yaml index 3d0db1026a..4697669844 100644 --- a/cluster/manifests/skipper/skipper-valkey.yaml +++ b/cluster/manifests/skipper/skipper-valkey.yaml @@ -1,4 +1,4 @@ -# {{ $image := "container-registry.zalando.net/library/valkey-9-alpine:9-alpine3.22-20260330" }} +# {{ $image := "container-registry.zalando.net/library/valkey-9-alpine:9-alpine3.22-20260413" }} # {{ $version := index (split $image ":") 1 }} {{- if eq .Cluster.ConfigItems.skipper_ingress_swarm_type "valkey" }} apiVersion: apps/v1 diff --git a/cluster/node-pools/master-default/userdata.yaml b/cluster/node-pools/master-default/userdata.yaml index 418e325484..86184553ae 100644 --- a/cluster/node-pools/master-default/userdata.yaml +++ b/cluster/node-pools/master-default/userdata.yaml @@ -216,7 +216,7 @@ write_files: limits: memory: {{ .Values.InstanceInfo.MemoryFraction (parseInt64 .Cluster.ConfigItems.apiserver_memory_limit_percent)}} {{- end }} - - image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/admission-controller:master-305 + - image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/admission-controller:master-307 name: admission-controller lifecycle: preStop: @@ -406,7 +406,7 @@ write_files: value: {{ .Cluster.ConfigItems.apiserver_business_partner_ids }} {{ end }} - name: skipper-proxy - image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/skipper:v0.24.64 + image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/skipper:v0.24.66 args: - skipper - -access-log-strip-query @@ -457,7 +457,7 @@ write_files: name: ssl-certs-kubernetes readOnly: true - name: skipper-metrics - image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/skipper:v0.24.64 + image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/skipper:v0.24.66 args: - skipper - -access-log-strip-query diff --git a/test/e2e/README.md b/test/e2e/README.md index 8ebfc29621..bd7463bf52 100644 --- a/test/e2e/README.md +++ b/test/e2e/README.md @@ -205,8 +205,13 @@ Follow up code, that waits for creations to be happen: make ``` - This will setup the go modules correctly and build a binary - `e2e.test`. + This will build a binary `e2e.test`. + + Install `ginkgo` if you haven't already: + + ```bash + make deps + ``` Run all Zalando tests from your local build: @@ -220,5 +225,20 @@ Follow up code, that waits for creations to be happen: -allowed-not-ready-nodes=-1 ``` + To run a _single test_ often the most useful, you can do it like this: + + ```bash + # S3_AWS_IAM_BUCKET and AWS_IAM_ROLE is required for the AWS-IAM tests. + KUBECONFIG=~/.kube/config HOSTED_ZONE=example.org CLUSTER_ALIAS=example \ + S3_AWS_IAM_BUCKET=zalando-e2e-aws-iam-test-12345678912-kube-1 \ + AWS_IAM_ROLE=kube-1-e2e-aws-iam-test \ + ginkgo -procs=1 -flake-attempts=2 -focus="name of test" \ + e2e.test -- -non-blocking-taints=node.kubernetes.io/role,nvidia.com/gpu,dedicated \ + -allowed-not-ready-nodes=-1 + ``` + + Note that the flag `-procs` is set to `1` in this case. If it's set to `25` but + only focusing on one test it will just hang. + [ginkgo]: https://onsi.github.io/ginkgo/