diff --git a/bindata/network/ovn-kubernetes/managed/004-config.yaml b/bindata/network/ovn-kubernetes/managed/common/004-config.yaml similarity index 100% rename from bindata/network/ovn-kubernetes/managed/004-config.yaml rename to bindata/network/ovn-kubernetes/managed/common/004-config.yaml diff --git a/bindata/network/ovn-kubernetes/managed/005-service.yaml b/bindata/network/ovn-kubernetes/managed/common/005-service.yaml similarity index 100% rename from bindata/network/ovn-kubernetes/managed/005-service.yaml rename to bindata/network/ovn-kubernetes/managed/common/005-service.yaml diff --git a/bindata/network/ovn-kubernetes/managed/007-certificate.yaml b/bindata/network/ovn-kubernetes/managed/common/007-certificate.yaml similarity index 100% rename from bindata/network/ovn-kubernetes/managed/007-certificate.yaml rename to bindata/network/ovn-kubernetes/managed/common/007-certificate.yaml diff --git a/bindata/network/ovn-kubernetes/managed/008-route.yaml b/bindata/network/ovn-kubernetes/managed/common/008-route.yaml similarity index 100% rename from bindata/network/ovn-kubernetes/managed/008-route.yaml rename to bindata/network/ovn-kubernetes/managed/common/008-route.yaml diff --git a/bindata/network/ovn-kubernetes/managed/alert-rules-control-plane.yaml b/bindata/network/ovn-kubernetes/managed/common/alert-rules-control-plane.yaml similarity index 100% rename from bindata/network/ovn-kubernetes/managed/alert-rules-control-plane.yaml rename to bindata/network/ovn-kubernetes/managed/common/alert-rules-control-plane.yaml diff --git a/bindata/network/ovn-kubernetes/managed/monitor-master.yaml b/bindata/network/ovn-kubernetes/managed/common/monitor-master.yaml similarity index 100% rename from bindata/network/ovn-kubernetes/managed/monitor-master.yaml rename to bindata/network/ovn-kubernetes/managed/common/monitor-master.yaml diff --git a/bindata/network/ovn-kubernetes/managed/multi-zone-interconnect/ovnkube-cluster-manager.yaml b/bindata/network/ovn-kubernetes/managed/multi-zone-interconnect/ovnkube-cluster-manager.yaml new file mode 100644 index 0000000000..fe22556118 --- /dev/null +++ b/bindata/network/ovn-kubernetes/managed/multi-zone-interconnect/ovnkube-cluster-manager.yaml @@ -0,0 +1,281 @@ +# The ovnkube control-plane components + +{{ if not .IsSNO }} +# The pod disruption budget ensures that we keep a raft quorum +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: ovn-raft-quorum-guard + namespace: {{.HostedClusterNamespace}} + annotations: + network.operator.openshift.io/cluster-name: {{.ManagementClusterName}} +spec: + minAvailable: {{.OVN_MIN_AVAILABLE}} + selector: + matchLabels: + app: ovnkube-master +--- +{{ end }} +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: ovnkube-master + namespace: {{.HostedClusterNamespace}} + annotations: + network.operator.openshift.io/cluster-name: {{.ManagementClusterName}} + kubernetes.io/description: | + This daemonset launches the ovn-kubernetes controller (master) networking components. + release.openshift.io/version: "{{.ReleaseVersion}}" + labels: + # used by PodAffinity to prefer co-locating pods that belong to the same hosted cluster. + hypershift.openshift.io/hosted-control-plane: {{.HostedClusterNamespace}} +spec: + podManagementPolicy: Parallel + selector: + matchLabels: + app: ovnkube-master + serviceName: ovnkube-master-internal + volumeClaimTemplates: + replicas: {{.OvnkubeMasterReplicas}} + template: + metadata: + annotations: + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + labels: + app: ovnkube-master + ovn-db-pod: "true" + component: network + type: infra + openshift.io/component: network + hypershift.openshift.io/control-plane-component: ovnkube-master + kubernetes.io/os: "linux" + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + preference: + matchExpressions: + - key: hypershift.openshift.io/control-plane + operator: In + values: + - "true" + - weight: 100 + preference: + matchExpressions: + - key: hypershift.openshift.io/cluster + operator: In + values: + - {{.HostedClusterNamespace}} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: ovnkube-master + topologyKey: topology.kubernetes.io/zone + podAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + hypershift.openshift.io/hosted-control-plane: {{.HostedClusterNamespace}} + topologyKey: kubernetes.io/hostname + priorityClassName: hypershift-api-critical + initContainers: + # Remove once https://github.com/kubernetes/kubernetes/issues/85966 is addressed + - name: init-ip + command: + - /bin/bash + - -c + - | + cat <<-EOF + Kubelet only sets a pod's Status.PodIP when all containers of the pod have started at least once (successfully or unsuccessfully) + or at least one of the initContainers finished. + Container start is blocked by postStart hooks. See https://github.com/kubernetes/kubernetes/issues/85966 for more details. + The NB and SB DB postStart hooks block until the DBs join the RAFT cluster or until a timeout is reached. + In a standalone cluster every pod is host networked and the DBs use host IP to communicate between the RAFT members. + In HyperShift OVN-Kubernetes master is run as a statefulset and the pods are not host networked, meaning we cannot rely on the podIP not changing. + To provide a stable network identity for each pod in the statefulset we use a headless service, + the downside of this approach is the DNS entry for the pod will only start to work after the pod has its Status.PodIP set. + + Until https://github.com/kubernetes/kubernetes/issues/85966 is fixed use a no-op init container as a workaround. + This allows for pod-pod connectivity in postStart hooks the first time they run. + EOF + image: "{{.OvnImage}}" + containers: + # token-minter creates a token with the default service account path + # The token is read by ovn-k containers to authenticate against the hosted cluster api server + - name: token-minter + image: "{{.TokenMinterImage}}" + command: ["/usr/bin/control-plane-operator", "token-minter"] + args: + - --service-account-namespace=openshift-ovn-kubernetes + - --service-account-name=ovn-kubernetes-controller + - --token-audience={{.TokenAudience}} + - --token-file=/var/run/secrets/hosted_cluster/token + - --kubeconfig=/etc/kubernetes/kubeconfig + resources: + requests: + cpu: 10m + memory: 30Mi + volumeMounts: + - mountPath: /etc/kubernetes + name: admin-kubeconfig + - mountPath: /var/run/secrets/hosted_cluster + name: hosted-cluster-api-access + # ovnkube master: convert kubernetes objects in to nbdb logical network components + - name: ovnkube-control-plane + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + if [[ -f "/env/_master" ]]; then + set -o allexport + source "/env/_master" + set +o allexport + fi + + # TLS for ovnkube-master metrics + TLS_PK=/etc/pki/tls/metrics-cert/tls.key + TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt + + if [ "{{.OVN_GATEWAY_MODE}}" == "shared" ]; then + gateway_mode_flags="--gateway-mode shared --gateway-interface br-ex" + elif [ "{{.OVN_GATEWAY_MODE}}" == "local" ]; then + gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" + else + echo "Invalid OVN_GATEWAY_MODE: \"{{.OVN_GATEWAY_MODE}}\". Must be \"local\" or \"shared\"." + exit 1 + fi + + retries=0 + while [ ! -f /var/run/secrets/hosted_cluster/token ]; do + (( retries += 1 )) + sleep 1 + if [[ "${retries}" -gt 30 ]]; then + echo "$(date -Iseconds) - Hosted cluster token not found" + exit 1 + fi + done + + multi_network_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_ENABLE}}" == "true" ]]; then + multi_network_enabled_flag="--enable-multi-network" + fi + + echo "I$(date "+%m%d %H:%M:%S.%N") - ovnkube-control-plane - start ovnkube --init-master ${K8S_NODE}" + exec /usr/bin/ovnkube \ + --init-cluster-manager "${K8S_NODE}" \ + --config-file=/run/ovnkube-config/ovnkube.conf \ + --k8s-token-file=/var/run/secrets/hosted_cluster/token \ + --ovn-empty-lb-events \ + --loglevel "${OVN_KUBE_LOG_LEVEL}" \ + #--metrics-bind-address "0.0.0.0:9102" \ + --metrics-bind-address "127.0.0.1:29104" \ + --metrics-enable-pprof \ + --metrics-enable-config-duration \ + ${gateway_mode_flags} \ + --sb-address "{{.OVN_SB_DB_LIST}}" \ + --sb-client-privkey /ovn-cert/tls.key \ + --sb-client-cert /ovn-cert/tls.crt \ + --sb-client-cacert /ovn-ca/ca-bundle.crt \ + --sb-cert-common-name "{{.OVN_CERT_CN}}" \ + --nb-address "{{.OVN_NB_DB_LIST}}" \ + --nb-client-privkey /ovn-cert/tls.key \ + --nb-client-cert /ovn-cert/tls.crt \ + --nb-client-cacert /ovn-ca/ca-bundle.crt \ + --nb-cert-common-name "{{.OVN_CERT_CN}}" \ + --enable-multicast \ + --disable-snat-multiple-gws \ + --node-server-privkey ${TLS_PK} \ + --node-server-cert ${TLS_CERT} \ + ${multi_network_enabled_flag} \ + --acl-logging-rate-limit "{{.OVNPolicyAuditRateLimit}}" + volumeMounts: + - mountPath: /run/ovnkube-config/ + name: ovnkube-config + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + - mountPath: /hosted-ca + name: hosted-ca-cert + - mountPath: /var/run/secrets/hosted_cluster + name: hosted-cluster-api-access + - name: ovn-master-metrics-cert + mountPath: /etc/pki/tls/metrics-cert + readOnly: True + resources: + requests: + cpu: 10m + memory: 200Mi + env: + - name: OVN_KUBE_LOG_LEVEL + value: "4" + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: ALL_PROXY + value: socks5://127.0.0.1:8090 + - name: NO_PROXY + value: kube-apiserver + ports: + - name: metrics-port + containerPort: 29104 + terminationMessagePolicy: FallbackToLogsOnError + {{ if .HCPNodeSelector }} + nodeSelector: + {{ range $key, $value := .HCPNodeSelector }} + "{{$key}}": "{{$value}}" + {{ end }} + {{ end }} + volumes: + - name: ovnkube-config + configMap: + name: ovnkube-config + - name: konnectivity-proxy-ca + configMap: + name: konnectivity-ca-bundle + - name: konnectivity-proxy-cert + secret: + defaultMode: 0640 + secretName: konnectivity-client + - name: env-overrides + configMap: + name: env-overrides + optional: true + - name: ovn-ca + configMap: + name: ovn-ca + - name: ovn-cert + secret: + secretName: ovn-cert + - name: ovn-master-metrics-cert + secret: + secretName: ovn-master-metrics-cert + - name: admin-kubeconfig + secret: + secretName: service-network-admin-kubeconfig + - name: hosted-cluster-api-access + emptyDir: {} + - name: hosted-ca-cert + secret: + secretName: root-ca + items: + - key: ca.crt + path: ca.crt + tolerations: + - key: "hypershift.openshift.io/control-plane" + operator: "Equal" + value: "true" + effect: "NoSchedule" + - key: "hypershift.openshift.io/cluster" + operator: "Equal" + value: {{.HostedClusterNamespace}} + effect: "NoSchedule" diff --git a/bindata/network/ovn-kubernetes/managed/multi-zone-interconnect/ovnkube-node.yaml b/bindata/network/ovn-kubernetes/managed/multi-zone-interconnect/ovnkube-node.yaml new file mode 100644 index 0000000000..c3f88990a5 --- /dev/null +++ b/bindata/network/ovn-kubernetes/managed/multi-zone-interconnect/ovnkube-node.yaml @@ -0,0 +1,1044 @@ +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: ovnkube-node + namespace: openshift-ovn-kubernetes + annotations: + kubernetes.io/description: | + This daemonset launches the ovn-kubernetes per node networking components. + release.openshift.io/version: "{{.ReleaseVersion}}" +spec: + selector: + matchLabels: + app: ovnkube-node + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 10% + template: + metadata: + annotations: + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + labels: + app: ovnkube-node + component: network + type: infra + openshift.io/component: network + kubernetes.io/os: "linux" + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: network.operator.openshift.io/dpu-host + {{ if eq .OVN_NODE_MODE "dpu-host" }} + operator: Exists + {{ else }} + operator: DoesNotExist + {{ end }} + - key: network.operator.openshift.io/dpu + operator: DoesNotExist + serviceAccountName: ovn-kubernetes-controller + hostNetwork: true + dnsPolicy: Default + hostPID: true + priorityClassName: "system-node-critical" + # volumes in all containers: + # (container) -> (host) + # /etc/openvswitch -> /etc/openvswitch - ovsdb system id + # /var/lib/openvswitch -> /var/lib/openvswitch/data - ovsdb data + # /run/openvswitch -> tmpfs - ovsdb sockets + # /env -> configmap env-overrides - debug overrides + containers: + {{ if eq .OVN_NODE_MODE "full" }} + # ovn-controller: programs the vswitch with flows from the sbdb + - name: ovn-controller + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -e + if [[ -f "/env/${K8S_NODE}" ]]; then + set -o allexport + source "/env/${K8S_NODE}" + set +o allexport + fi + + echo "$(date -Iseconds) - starting ovn-controller" + exec ovn-controller unix:/var/run/openvswitch/db.sock -vfile:off \ + --no-chdir --pidfile=/var/run/ovn/ovn-controller.pid \ + --syslog-method="{{.OVNPolicyAuditDestination}}" \ + --log-file=/var/log/ovn/acl-audit-log.log \ + -vFACILITY:"{{.OVNPolicyAuditSyslogFacility}}" \ + -p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt \ + -vconsole:"${OVN_LOG_LEVEL}" -vconsole:"acl_log:off" \ + -vPATTERN:console:"{{.OVN_LOG_PATTERN_CONSOLE}}" \ + -vsyslog:"acl_log:info" \ + -vfile:"acl_log:info" + securityContext: + privileged: true + env: + - name: OVN_LOG_LEVEL + value: info + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - mountPath: /run/openvswitch + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /etc/openvswitch + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch + name: var-lib-openvswitch + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + - mountPath: /var/log/ovn/ + name: node-log + - mountPath: /dev/log + name: log-socket + terminationMessagePolicy: FallbackToLogsOnError + resources: + requests: + cpu: 10m + memory: 300Mi + - name: ovn-acl-logging + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -euo pipefail + + # Rotate audit log files when then get to max size (in bytes) + MAXFILESIZE=$(( "{{.OVNPolicyAuditMaxFileSize}}"*1000000 )) + LOGFILE=/var/log/ovn/acl-audit-log.log + CONTROLLERPID=$(cat /run/ovn/ovn-controller.pid) + + # Redirect err to null so no messages are shown upon rotation + tail -F ${LOGFILE} 2> /dev/null & + + while true + do + # Make sure ovn-controller's logfile exists, and get current size in bytes + if [ -f "$LOGFILE" ]; then + file_size=`du -b ${LOGFILE} | tr -s '\t' ' ' | cut -d' ' -f1` + else + ovs-appctl -t /var/run/ovn/ovn-controller.${CONTROLLERPID}.ctl vlog/reopen + file_size=`du -b ${LOGFILE} | tr -s '\t' ' ' | cut -d' ' -f1` + fi + + if [ $file_size -gt $MAXFILESIZE ];then + echo "Rotating OVN ACL Log File" + timestamp=`date '+%Y-%m-%dT%H-%M-%S'` + mv ${LOGFILE} /var/log/ovn/acl-audit-log.$timestamp.log + ovs-appctl -t /run/ovn/ovn-controller.${CONTROLLERPID}.ctl vlog/reopen + CONTROLLERPID=$(cat /run/ovn/ovn-controller.pid) + fi + + # sleep for 30 seconds to avoid wasting CPU + sleep 30 + done + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/log/ovn/ + name: node-log + - mountPath: /run/ovn/ + name: run-ovn + {{ end }} + - name: kube-rbac-proxy-node + image: {{.KubeRBACProxyImage}} + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + TLS_PK=/etc/pki/tls/metrics-cert/tls.key + TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt + # As the secret mount is optional we must wait for the files to be present. + # The service is created in monitor.yaml and this is created in sdn.yaml. + # If it isn't created there is probably an issue so we want to crashloop. + retries=0 + TS=$(date +%s) + WARN_TS=$(( ${TS} + $(( 20 * 60)) )) + HAS_LOGGED_INFO=0 + + log_missing_certs(){ + CUR_TS=$(date +%s) + if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then + echo $(date -Iseconds) WARN: ovn-node-metrics-cert not mounted after 20 minutes. + elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then + echo $(date -Iseconds) INFO: ovn-node-metrics-cert not mounted. Waiting one hour. + HAS_LOGGED_INFO=1 + fi + } + while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do + log_missing_certs + sleep 5 + done + + echo $(date -Iseconds) INFO: ovn-node-metrics-certs mounted, starting kube-rbac-proxy + exec /usr/bin/kube-rbac-proxy \ + --logtostderr \ + --secure-listen-address=:9103 \ + --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ + --upstream=http://127.0.0.1:29103/ \ + --tls-private-key-file=${TLS_PK} \ + --tls-cert-file=${TLS_CERT} + ports: + - containerPort: 9103 + name: https + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: ovn-node-metrics-cert + mountPath: /etc/pki/tls/metrics-cert + readOnly: True + - name: kube-rbac-proxy-ovn-metrics + image: {{.KubeRBACProxyImage}} + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + TLS_PK=/etc/pki/tls/metrics-cert/tls.key + TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt + # As the secret mount is optional we must wait for the files to be present. + # The service is created in monitor.yaml and this is created in sdn.yaml. + # If it isn't created there is probably an issue so we want to crashloop. + retries=0 + TS=$(date +%s) + WARN_TS=$(( ${TS} + $(( 20 * 60)) )) + HAS_LOGGED_INFO=0 + + log_missing_certs(){ + CUR_TS=$(date +%s) + if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then + echo $(date -Iseconds) WARN: ovn-node-metrics-cert not mounted after 20 minutes. + elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then + echo $(date -Iseconds) INFO: ovn-node-metrics-cert not mounted. Waiting one hour. + HAS_LOGGED_INFO=1 + fi + } + while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do + log_missing_certs + sleep 5 + done + + echo $(date -Iseconds) INFO: ovn-node-metrics-certs mounted, starting kube-rbac-proxy + exec /usr/bin/kube-rbac-proxy \ + --logtostderr \ + --secure-listen-address=:9105 \ + --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ + --upstream=http://127.0.0.1:29105/ \ + --tls-private-key-file=${TLS_PK} \ + --tls-cert-file=${TLS_CERT} + ports: + - containerPort: 9105 + name: https + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: ovn-node-metrics-cert + mountPath: /etc/pki/tls/metrics-cert + readOnly: True + # ovn-northd: convert network objects in nbdb to flows in sbdb + - name: northd + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xem + if [[ -f /env/_master ]]; then + set -o allexport + source /env/_master + set +o allexport + fi + + quit() { + echo "$(date -Iseconds) - stopping ovn-northd" + OVN_MANAGE_OVSDB=no /usr/share/ovn/scripts/ovn-ctl stop_northd + echo "$(date -Iseconds) - ovn-northd stopped" + rm -f /var/run/ovn/ovn-northd.pid + exit 0 + } + # end of quit + trap quit TERM INT + + echo "$(date -Iseconds) - starting ovn-northd" + exec ovn-northd \ + --no-chdir "-vconsole:${OVN_LOG_LEVEL}" -vfile:off "-vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + --pidfile /var/run/ovn/ovn-northd.pid & + + wait $! + lifecycle: + preStop: + exec: + command: + - OVN_MANAGE_OVSDB=no + - /usr/share/ovn/scripts/ovn-ctl + - stop_northd + env: + - name: OVN_LOG_LEVEL + value: info + volumeMounts: + - mountPath: /etc/openvswitch/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch/ + name: var-lib-openvswitch + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert # not needed, but useful when exec'ing in to pod. + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + terminationMessagePolicy: FallbackToLogsOnError + + # nbdb: the northbound, or logical network object DB. In standalone mode listening on unix socket. + - name: nbdb + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xem + if [[ -f /env/_master ]]; then + set -o allexport + source /env/_master + set +o allexport + fi + + quit() { + echo "$(date -Iseconds) - stopping nbdb" + /usr/share/ovn/scripts/ovn-ctl stop_nb_ovsdb + echo "$(date -Iseconds) - nbdb stopped" + rm -f /var/run/ovn/ovnnb_db.pid + exit 0 + } + # end of quit + trap quit TERM INT + + bracketify() { case "$1" in *:*) echo "[$1]" ;; *) echo "$1" ;; esac } + + # checks if a db pod is part of a current cluster + + OVN_ARGS="--no-monitor" + exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ + --ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + run_nb_ovsdb & + + wait $! + + lifecycle: + postStart: + exec: + command: + - /bin/bash + - -c + - | + set -x + rm -f /var/run/ovn/ovnnb_db.pid + #configure northd_probe_interval + OVN_NB_CTL="ovn-nbctl" + + echo "Setting the IC zone to ${K8S_NODE}" + retries=0 + current_probe_interval=0 + while [[ "${retries}" -lt 10 ]]; do + current_probe_interval=$(${OVN_NB_CTL} set NB_Global . name="${K8S_NODE}" options:name="${K8S_NODE}") + if [[ $? == 0 ]]; then + current_probe_interval=$(echo ${current_probe_interval} | tr -d '\"') + break + else + sleep 2 + (( retries += 1 )) + fi + done + + northd_probe_interval=${OVN_NORTHD_PROBE_INTERVAL:-5000} + echo "Setting northd probe interval to ${northd_probe_interval} ms" + retries=0 + current_probe_interval=0 + while [[ "${retries}" -lt 10 ]]; do + current_probe_interval=$(${OVN_NB_CTL} --if-exists get NB_GLOBAL . options:northd_probe_interval) + if [[ $? == 0 ]]; then + current_probe_interval=$(echo ${current_probe_interval} | tr -d '\"') + break + else + sleep 2 + (( retries += 1 )) + fi + done + + if [[ "${current_probe_interval}" != "${northd_probe_interval}" ]]; then + retries=0 + while [[ "${retries}" -lt 10 ]]; do + ${OVN_NB_CTL} set NB_GLOBAL . options:northd_probe_interval=${northd_probe_interval} + if [[ $? != 0 ]]; then + echo "Failed to set northd probe interval to ${northd_probe_interval}. retrying....." + sleep 2 + (( retries += 1 )) + else + echo "Successfully set northd probe interval to ${northd_probe_interval} ms" + break + fi + done + fi + + preStop: + exec: + command: + - /usr/bin/ovn-appctl + - -t + - /var/run/ovn/ovnnb_db.ctl + - exit + readinessProbe: +{{ if not .IsSNO }} + initialDelaySeconds: 90 +{{ end }} + timeoutSeconds: 5 + exec: + command: + - /bin/bash + - -c + - | + set -xeo pipefail + status=$(/usr/bin/ovn-appctl -t /var/run/ovn/ovnnb_db.ctl --timeout=3 ovsdb-server/sync-status 2>/dev/null | { grep "state: active" || false; }) + if [[ -z "${status}" ]]; then + echo "NB DB is not running or active." + exit 1 + fi + + env: + - name: OVN_LOG_LEVEL + value: info + - name: OVN_NORTHD_PROBE_INTERVAL + value: "{{.OVN_NORTHD_PROBE_INTERVAL}}" + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - mountPath: /etc/openvswitch/ + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch/ + name: var-lib-openvswitch + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + terminationMessagePolicy: FallbackToLogsOnError + + # sbdb: the southbound, or logical network object DB. In standalone mode listening on unix socket + - name: sbdb + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xem + if [[ -f /env/_master ]]; then + set -o allexport + source /env/_master + set +o allexport + fi + + quit() { + echo "$(date -Iseconds) - stopping sbdb" + /usr/share/ovn/scripts/ovn-ctl stop_sb_ovsdb + echo "$(date -Iseconds) - sbdb stopped" + rm -f /var/run/ovn/ovnsb_db.pid + exit 0 + } + # end of quit + trap quit TERM INT + + bracketify() { case "$1" in *:*) echo "[$1]" ;; *) echo "$1" ;; esac } + + OVN_ARGS="--no-monitor" + exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ + --ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + run_sb_ovsdb & + + wait $! + + lifecycle: + postStart: + exec: + command: + - /bin/bash + - -c + - | + set -x + rm -f /var/run/ovn/ovnsb_db.pid + + preStop: + exec: + command: + - /usr/bin/ovn-appctl + - -t + - /var/run/ovn/ovnsb_db.ctl + - exit + readinessProbe: +{{ if not .IsSNO }} + initialDelaySeconds: 90 +{{ end }} + timeoutSeconds: 5 + exec: + command: + - /bin/bash + - -c + - | + set -xeo pipefail + status=$(/usr/bin/ovn-appctl -t /var/run/ovn/ovnsb_db.ctl --timeout=3 ovsdb-server/sync-status 2>/dev/null | { grep "state: active" || false; }) + if [[ -z "${status}" ]]; then + echo "SB DB is not running or active." + exit 1 + fi + + env: + - name: OVN_LOG_LEVEL + value: info + volumeMounts: + - mountPath: /etc/openvswitch/ + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch/ + name: var-lib-openvswitch + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + terminationMessagePolicy: FallbackToLogsOnError + + - name: kube-rbac-proxy-ncm + image: {{.KubeRBACProxyImage}} + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + TLS_PK=/etc/pki/tls/metrics-cert/tls.key + TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt + # As the secret mount is optional we must wait for the files to be present. + # The service is created in monitor.yaml and this is created in sdn.yaml. + TS=$(date +%s) + WARN_TS=$(( ${TS} + $(( 20 * 60)) )) + HAS_LOGGED_INFO=0 + + log_missing_certs(){ + CUR_TS=$(date +%s) + if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then + echo $(date -Iseconds) WARN: ovn-master-metrics-cert not mounted after 20 minutes. + elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then + echo $(date -Iseconds) INFO: ovn-master-metrics-cert not mounted. Waiting 20 minutes. + HAS_LOGGED_INFO=1 + fi + } + while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do + log_missing_certs + sleep 5 + done + + echo $(date -Iseconds) INFO: ovn-master-metrics-certs mounted, starting kube-rbac-proxy + exec /usr/bin/kube-rbac-proxy \ + --logtostderr \ + --secure-listen-address=:9113 \ + --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ + --upstream=http://127.0.0.1:29113/ \ + --tls-private-key-file=${TLS_PK} \ + --tls-cert-file=${TLS_CERT} + ports: + - containerPort: 9113 + name: https + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: ovn-master-metrics-cert + mountPath: /etc/pki/tls/metrics-cert + readOnly: True + + # ovnkube-controller in multizone mode: convert kubernetes objects in to nbdb logical network components + - name: ovnkube-controller + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + if [[ -f "/env/_local" ]]; then + set -o allexport + source "/env/_local" + set +o allexport + fi + + if [ "{{.OVN_GATEWAY_MODE}}" == "shared" ]; then + gateway_mode_flags="--gateway-mode shared --gateway-interface br-ex" + elif [ "{{.OVN_GATEWAY_MODE}}" == "local" ]; then + gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" + else + echo "Invalid OVN_GATEWAY_MODE: \"{{.OVN_GATEWAY_MODE}}\". Must be \"local\" or \"shared\"." + exit 1 + fi + + multi_network_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_ENABLE}}" == "true" ]]; then + multi_network_enabled_flag="--enable-multi-network" + fi + + multi_network_policy_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_POLICY_ENABLE}}" == "true" ]]; then + multi_network_policy_enabled_flag="--enable-multi-networkpolicy" + fi + + echo "I$(date "+%m%d %H:%M:%S.%N") - ovnkube-controller - start ovnkube init-ovnkube-controller ${K8S_NODE}" + exec /usr/bin/ovnkube \ + --init-ovnkube-controller "${K8S_NODE}" \ + --config-file=/run/ovnkube-config/ovnkube.conf \ + --ovn-empty-lb-events \ + --loglevel "${OVN_KUBE_LOG_LEVEL}" \ + --metrics-bind-address "127.0.0.1:29113" \ + --metrics-enable-pprof \ + --metrics-enable-config-duration \ + ${gateway_mode_flags} \ + --disable-snat-multiple-gws \ + ${multi_network_enabled_flag} \ + ${multi_network_policy_enabled_flag} \ + --enable-multicast \ + --zone ${K8S_NODE} \ + --enable-interconnect \ + --acl-logging-rate-limit "{{.OVNPolicyAuditRateLimit}}" + volumeMounts: + # for checking ovs-configuration service + - mountPath: /etc/systemd/system + name: systemd-units + readOnly: true + - mountPath: /etc/openvswitch/ + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch/ + name: var-lib-openvswitch + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /run/ovnkube-config/ + name: ovnkube-config + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + env: + - name: OVN_KUBE_LOG_LEVEL + value: "4" + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + ports: + - name: metrics-port + containerPort: 29113 + terminationMessagePolicy: FallbackToLogsOnError + + # ovnkube-node: does node-level bookkeeping and configuration + - name: ovnkube-node + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + if [[ -f "/env/${K8S_NODE}" ]]; then + set -o allexport + source "/env/${K8S_NODE}" + set +o allexport + fi + cp -f /usr/libexec/cni/ovn-k8s-cni-overlay /cni-bin-dir/ + ovn_config_namespace=openshift-ovn-kubernetes + echo "I$(date "+%m%d %H:%M:%S.%N") - disable conntrack on geneve port" + iptables -t raw -A PREROUTING -p udp --dport {{.GenevePort}} -j NOTRACK + iptables -t raw -A OUTPUT -p udp --dport {{.GenevePort}} -j NOTRACK + ip6tables -t raw -A PREROUTING -p udp --dport {{.GenevePort}} -j NOTRACK + ip6tables -t raw -A OUTPUT -p udp --dport {{.GenevePort}} -j NOTRACK + echo "I$(date "+%m%d %H:%M:%S.%N") - starting ovnkube-node" + + if [ "{{.OVN_GATEWAY_MODE}}" == "shared" ]; then + gateway_mode_flags="--gateway-mode shared --gateway-interface br-ex" + elif [ "{{.OVN_GATEWAY_MODE}}" == "local" ]; then + gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" + else + echo "Invalid OVN_GATEWAY_MODE: \"{{.OVN_GATEWAY_MODE}}\". Must be \"local\" or \"shared\"." + exit 1 + fi + + export_network_flows_flags= + if [[ -n "${NETFLOW_COLLECTORS}" ]] ; then + export_network_flows_flags="--netflow-targets ${NETFLOW_COLLECTORS}" + fi + if [[ -n "${SFLOW_COLLECTORS}" ]] ; then + export_network_flows_flags="$export_network_flows_flags --sflow-targets ${SFLOW_COLLECTORS}" + fi + if [[ -n "${IPFIX_COLLECTORS}" ]] ; then + export_network_flows_flags="$export_network_flows_flags --ipfix-targets ${IPFIX_COLLECTORS}" + fi + if [[ -n "${IPFIX_CACHE_MAX_FLOWS}" ]] ; then + export_network_flows_flags="$export_network_flows_flags --ipfix-cache-max-flows ${IPFIX_CACHE_MAX_FLOWS}" + fi + if [[ -n "${IPFIX_CACHE_ACTIVE_TIMEOUT}" ]] ; then + export_network_flows_flags="$export_network_flows_flags --ipfix-cache-active-timeout ${IPFIX_CACHE_ACTIVE_TIMEOUT}" + fi + if [[ -n "${IPFIX_SAMPLING}" ]] ; then + export_network_flows_flags="$export_network_flows_flags --ipfix-sampling ${IPFIX_SAMPLING}" + fi + gw_interface_flag= + # if br-ex1 is configured on the node, we want to use it for external gateway traffic + if [ -d /sys/class/net/br-ex1 ]; then + gw_interface_flag="--exgw-interface=br-ex1" + fi + + node_mgmt_port_netdev_flags= + if [[ -n "${OVNKUBE_NODE_MGMT_PORT_NETDEV}" ]] ; then + node_mgmt_port_netdev_flags="--ovnkube-node-mgmt-port-netdev ${OVNKUBE_NODE_MGMT_PORT_NETDEV}" + fi + + multi_network_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_ENABLE}}" == "true" ]]; then + multi_network_enabled_flag="--enable-multi-network" + fi + + multi_network_policy_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_POLICY_ENABLE}}" == "true" ]]; then + multi_network_policy_enabled_flag="--enable-multi-networkpolicy" + fi + + exec /usr/bin/ovnkube --init-node "${K8S_NODE}" \ + --config-file=/run/ovnkube-config/ovnkube.conf \ + --loglevel "${OVN_KUBE_LOG_LEVEL}" \ + --inactivity-probe="${OVN_CONTROLLER_INACTIVITY_PROBE}" \ + ${gateway_mode_flags} \ + ${node_mgmt_port_netdev_flags} \ + {{- if eq .OVN_NODE_MODE "dpu-host" }} + --ovnkube-node-mode dpu-host \ + {{- end }} + --metrics-bind-address "127.0.0.1:29103" \ + --ovn-metrics-bind-address "127.0.0.1:29105" \ + --metrics-enable-pprof \ + --export-ovs-metrics \ + --disable-snat-multiple-gws \ + ${export_network_flows_flags} \ + ${multi_network_enabled_flag} \ + ${multi_network_policy_enabled_flag} \ + --zone ${K8S_NODE} \ + --enable-interconnect \ + ${gw_interface_flag} + env: + # for kubectl + - name: KUBERNETES_SERVICE_PORT + value: "{{.KUBERNETES_SERVICE_PORT}}" + - name: KUBERNETES_SERVICE_HOST + value: "{{.KUBERNETES_SERVICE_HOST}}" + - name: OVN_CONTROLLER_INACTIVITY_PROBE + value: "{{.OVN_CONTROLLER_INACTIVITY_PROBE}}" + - name: OVN_KUBE_LOG_LEVEL + value: "4" + {{ if .NetFlowCollectors }} + - name: NETFLOW_COLLECTORS + value: "{{.NetFlowCollectors}}" + {{ end }} + {{ if .SFlowCollectors }} + - name: SFLOW_COLLECTORS + value: "{{.SFlowCollectors}}" + {{ end }} + {{ if .IPFIXCollectors }} + - name: IPFIX_COLLECTORS + value: "{{.IPFIXCollectors}}" + {{ end }} + {{ if .IPFIXCacheMaxFlows }} + - name: IPFIX_CACHE_MAX_FLOWS + value: "{{.IPFIXCacheMaxFlows}}" + {{ end }} + {{ if .IPFIXCacheActiveTimeout }} + - name: IPFIX_CACHE_ACTIVE_TIMEOUT + value: "{{.IPFIXCacheActiveTimeout}}" + {{ end }} + {{ if .IPFIXSampling }} + - name: IPFIX_SAMPLING + value: "{{.IPFIXSampling}}" + {{ end }} + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + ports: + - name: metrics-port + containerPort: 29103 + securityContext: + privileged: true + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + # for checking ovs-configuration service + - mountPath: /etc/systemd/system + name: systemd-units + readOnly: true + # for the iptables wrapper + - mountPath: /host + name: host-slash + readOnly: true + mountPropagation: HostToContainer + # for the CNI server socket + - mountPath: /run/ovn-kubernetes/ + name: host-run-ovn-kubernetes + # accessing bind-mounted net namespaces + - mountPath: /run/netns + name: host-run-netns + readOnly: true + mountPropagation: HostToContainer + # for installing the CNI plugin binary + - mountPath: /cni-bin-dir + name: host-cni-bin + # for installing the CNI configuration file + - mountPath: /etc/cni/net.d + name: host-cni-netd + # Where we store IP allocations + - mountPath: /var/lib/cni/networks/ovn-k8s-cni-overlay + name: host-var-lib-cni-networks-ovn-kubernetes + - mountPath: /run/openvswitch + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /etc/openvswitch + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch + name: var-lib-openvswitch + - mountPath: /run/ovnkube-config/ + name: ovnkube-config + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + lifecycle: + preStop: + exec: + command: ["rm","-f","/etc/cni/net.d/10-ovn-kubernetes.conf"] + readinessProbe: + exec: + command: ["test", "-f", "/etc/cni/net.d/10-ovn-kubernetes.conf"] + initialDelaySeconds: 5 + periodSeconds: 30 + {{- if .OVNPlatformAzure}} + - name: drop-icmp + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + + touch /var/run/ovn/add_iptables.sh + chmod 0755 /var/run/ovn/add_iptables.sh + cat <<'EOF' > /var/run/ovn/add_iptables.sh + #!/bin/sh + if [ -z "$3" ] + then + echo "Called with host address missing, ignore" + exit 0 + fi + echo "Adding ICMP drop rule for '$3' " + if iptables -C CHECK_ICMP_SOURCE -p icmp -s $3 -j ICMP_ACTION + then + echo "iptables already set for $3" + else + iptables -A CHECK_ICMP_SOURCE -p icmp -s $3 -j ICMP_ACTION + fi + EOF + + echo "I$(date "+%m%d %H:%M:%S.%N") - drop-icmp - start drop-icmp ${K8S_NODE}" + iptables -X CHECK_ICMP_SOURCE || true + iptables -N CHECK_ICMP_SOURCE || true + iptables -F CHECK_ICMP_SOURCE + iptables -D INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE || true + iptables -I INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE + iptables -N ICMP_ACTION || true + iptables -F ICMP_ACTION + iptables -A ICMP_ACTION -j LOG + iptables -A ICMP_ACTION -j DROP + # + ip addr show + ip route show + iptables -nvL + iptables -nvL -t nat + oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_iptables.sh + #systemd-run -qPG -- oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_iptables.sh + lifecycle: + preStop: + exec: + command: ["/bin/bash", "-c", "echo drop-icmp done"] + securityContext: + privileged: true + volumeMounts: + # for the iptables wrapper + - mountPath: /host + name: host-slash + readOnly: true + mountPropagation: HostToContainer + - mountPath: /run/ovn/ + name: run-ovn + resources: + requests: + cpu: 5m + memory: 20Mi + env: + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + {{- end}} + nodeSelector: + beta.kubernetes.io/os: "linux" + volumes: + # for checking ovs-configuration service + - name: systemd-units + hostPath: + path: /etc/systemd/system + # used for iptables wrapper scripts + - name: host-slash + hostPath: + path: / + - name: host-run-netns + hostPath: + path: /run/netns + - name: var-lib-openvswitch + hostPath: + path: /var/lib/openvswitch/data + - name: etc-openvswitch + hostPath: + path: /var/lib/ovn-ic/etc # different path than 4.13 and single-zone 4.14 + - name: run-openvswitch + hostPath: + path: /var/run/openvswitch + - name: run-ovn + hostPath: + path: /var/run/ovn-ic # different path than 4.13 and single-zone 4.14 + {{ if eq .OVN_NODE_MODE "full" }} + # Used for placement of ACL audit logs + - name: node-log + hostPath: + path: /var/log/ovn-ic # different path than 4.13 and single-zone 4.14 + - name: log-socket + hostPath: + path: /dev/log + {{ end }} + # For CNI server + - name: host-run-ovn-kubernetes + hostPath: + path: /run/ovn-kubernetes + - name: host-cni-bin + hostPath: + path: "{{.CNIBinDir}}" + - name: host-cni-netd + hostPath: + path: "{{.CNIConfDir}}" + - name: host-var-lib-cni-networks-ovn-kubernetes + hostPath: + path: /var/lib/cni/networks/ovn-k8s-cni-overlay + - name: ovnkube-config + configMap: + name: ovnkube-config + - name: env-overrides + configMap: + name: env-overrides + optional: true + - name: ovn-ca + configMap: + name: ovn-ca + - name: ovn-cert + secret: + secretName: ovn-cert + - name: ovn-node-metrics-cert + secret: + secretName: ovn-node-metrics-cert + optional: true + - name: ovn-master-metrics-cert + secret: + secretName: ovn-master-metrics-cert + optional: true + tolerations: + - operator: "Exists" diff --git a/bindata/network/ovn-kubernetes/managed/single-zone-interconnect/ovnkube-master.yaml b/bindata/network/ovn-kubernetes/managed/single-zone-interconnect/ovnkube-master.yaml new file mode 100644 index 0000000000..83036023a1 --- /dev/null +++ b/bindata/network/ovn-kubernetes/managed/single-zone-interconnect/ovnkube-master.yaml @@ -0,0 +1,1104 @@ +# The ovnkube control-plane components + +{{ if not .IsSNO }} +# The pod disruption budget ensures that we keep a raft quorum +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: ovn-raft-quorum-guard + namespace: {{.HostedClusterNamespace}} + annotations: + network.operator.openshift.io/cluster-name: {{.ManagementClusterName}} +spec: + minAvailable: {{.OVN_MIN_AVAILABLE}} + selector: + matchLabels: + app: ovnkube-master +--- +{{ end }} +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: ovnkube-master + namespace: {{.HostedClusterNamespace}} + annotations: + network.operator.openshift.io/cluster-name: {{.ManagementClusterName}} + kubernetes.io/description: | + This daemonset launches the ovn-kubernetes controller (master) networking components. + release.openshift.io/version: "{{.ReleaseVersion}}" + labels: + # used by PodAffinity to prefer co-locating pods that belong to the same hosted cluster. + hypershift.openshift.io/hosted-control-plane: {{.HostedClusterNamespace}} +spec: + podManagementPolicy: Parallel + selector: + matchLabels: + app: ovnkube-master + serviceName: ovnkube-master-internal + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: datadir + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + volumeMode: Filesystem + replicas: {{.OvnkubeMasterReplicas}} + template: + metadata: + annotations: + hypershift.openshift.io/release-image: {{.ReleaseImage}} + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + cluster-autoscaler.kubernetes.io/safe-to-evict-local-volumes: "hosted-cluster-api-access" + labels: + app: ovnkube-master + ovn-db-pod: "true" + component: network + type: infra + openshift.io/component: network + hypershift.openshift.io/control-plane-component: ovnkube-master + kubernetes.io/os: "linux" + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + preference: + matchExpressions: + - key: hypershift.openshift.io/control-plane + operator: In + values: + - "true" + - weight: 100 + preference: + matchExpressions: + - key: hypershift.openshift.io/cluster + operator: In + values: + - {{.HostedClusterNamespace}} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: ovnkube-master + topologyKey: topology.kubernetes.io/zone + podAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + hypershift.openshift.io/hosted-control-plane: {{.HostedClusterNamespace}} + topologyKey: kubernetes.io/hostname + priorityClassName: hypershift-api-critical + initContainers: + # Remove once https://github.com/kubernetes/kubernetes/issues/85966 is addressed + - name: init-ip + command: + - /bin/bash + - -c + - | + cat <<-EOF + Kubelet only sets a pod's Status.PodIP when all containers of the pod have started at least once (successfully or unsuccessfully) + or at least one of the initContainers finished. + Container start is blocked by postStart hooks. See https://github.com/kubernetes/kubernetes/issues/85966 for more details. + The NB and SB DB postStart hooks block until the DBs join the RAFT cluster or until a timeout is reached. + In a standalone cluster every pod is host networked and the DBs use host IP to communicate between the RAFT members. + In HyperShift OVN-Kubernetes master is run as a statefulset and the pods are not host networked, meaning we cannot rely on the podIP not changing. + To provide a stable network identity for each pod in the statefulset we use a headless service, + the downside of this approach is the DNS entry for the pod will only start to work after the pod has its Status.PodIP set. + + Until https://github.com/kubernetes/kubernetes/issues/85966 is fixed use a no-op init container as a workaround. + This allows for pod-pod connectivity in postStart hooks the first time they run. + EOF + image: "{{.OvnImage}}" + containers: + # token-minter creates a token with the default service account path + # The token is read by ovn-k containers to authenticate against the hosted cluster api server + - name: token-minter + image: "{{.TokenMinterImage}}" + command: ["/usr/bin/control-plane-operator", "token-minter"] + args: + - --service-account-namespace=openshift-ovn-kubernetes + - --service-account-name=ovn-kubernetes-controller + - --token-audience={{.TokenAudience}} + - --token-file=/var/run/secrets/hosted_cluster/token + - --kubeconfig=/etc/kubernetes/kubeconfig + resources: + requests: + cpu: 10m + memory: 30Mi + volumeMounts: + - mountPath: /etc/kubernetes + name: admin-kubeconfig + - mountPath: /var/run/secrets/hosted_cluster + name: hosted-cluster-api-access + # ovn-northd: convert network objects in nbdb to flows in sbdb + - name: northd + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xem + if [[ -f /env/_master ]]; then + set -o allexport + source /env/_master + set +o allexport + fi + + quit() { + echo "$(date -Iseconds) - stopping ovn-northd" + OVN_MANAGE_OVSDB=no /usr/share/ovn/scripts/ovn-ctl stop_northd + echo "$(date -Iseconds) - ovn-northd stopped" + rm -f /var/run/ovn/ovn-northd.pid + exit 0 + } + # end of quit + trap quit TERM INT + + echo "$(date -Iseconds) - starting ovn-northd" + exec ovn-northd \ + --no-chdir "-vconsole:${OVN_LOG_LEVEL}" -vfile:off "-vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + --ovnnb-db "{{.OVN_NB_DB_LIST}}" \ + --ovnsb-db "{{.OVN_SB_DB_LIST}}" \ + --pidfile /var/run/ovn/ovn-northd.pid \ + --n-threads={{.NorthdThreads}} \ + -p /ovn-cert/tls.key \ + -c /ovn-cert/tls.crt \ + -C /ovn-ca/ca-bundle.crt & + + wait $! + lifecycle: + preStop: + exec: + command: + - /bin/bash + - -c + - OVN_MANAGE_OVSDB=no /usr/share/ovn/scripts/ovn-ctl stop_northd + env: + - name: OVN_LOG_LEVEL + value: info + volumeMounts: + - mountPath: /var/run/ovn + name: datadir + - mountPath: /etc/ovn + name: datadir + - mountPath: /var/log/ovn + name: datadir + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert # not needed, but useful when exec'ing in to pod. + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 70Mi + terminationMessagePolicy: FallbackToLogsOnError + + # nbdb: the northbound, or logical network object DB. In raft mode + - name: nbdb + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xem + if [[ -f /env/_master ]]; then + set -o allexport + source /env/_master + set +o allexport + fi + + quit() { + echo "$(date -Iseconds) - stopping nbdb" + /usr/share/ovn/scripts/ovn-ctl stop_nb_ovsdb + echo "$(date -Iseconds) - nbdb stopped" + rm -f /var/run/ovn/ovnnb_db.pid + exit 0 + } + # end of quit + trap quit TERM INT + + bracketify() { case "$1" in *:*) echo "[$1]" ;; *) echo "$1" ;; esac } + + # initialize variables + pod_dns_name="${K8S_POD_NAME}.ovnkube-master-internal.{{.HostedClusterNamespace}}.svc.cluster.local" + ovndb_ctl_ssl_opts="-p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt" + transport="ssl" + ovn_raft_conn_ip_url_suffix="" + if [[ "${K8S_POD_IP}" == *":"* ]]; then + ovn_raft_conn_ip_url_suffix=":[::]" + fi + db="nb" + db_port="{{.OVN_NB_PORT}}" + ovn_db_file="/etc/ovn/ovn${db}_db.db" + # checks if a db pod is part of a current cluster + db_part_of_cluster() { + local pod_index=${1} + local db=${2} + local port=${3} + echo "Checking if ovnkube-master-${pod_index} is part of cluster" + + init_ip="ovnkube-master-${pod_index}.ovnkube-master-internal.{{.HostedClusterNamespace}}.svc.cluster.local" + + + + + echo "ovnkube-master-${pod_index} ip: $init_ip" + init_ip=$(bracketify $init_ip) + target=$(ovn-${db}ctl --timeout=5 --db=${transport}:${init_ip}:${port} ${ovndb_ctl_ssl_opts} \ + --data=bare --no-headings --columns=target list connection || true) + if [[ "${target}" != "p${transport}:${port}${ovn_raft_conn_ip_url_suffix}" ]]; then + echo "Unable to check correct target ${target} " + return 1 + fi + echo "ovnkube-master-${pod_index} is part of cluster" + return 0 + } + # end of db_part_of_cluster + + # Checks if cluster has already been initialized. + # If not it returns false and sets init_ip to CLUSTER_INITIATOR_IP + cluster_exists() { + local db=${1} + local port=${2} + for ((i=0; i<{{.OvnkubeMasterReplicas}}; i++ )); do + if db_part_of_cluster $i $db $port; then + echo "ovnkube-master-${i} is part of current cluster with ip: ${init_ip}!" + return 0 + fi + done + # if we get here there is no cluster, set init_ip and get out + init_ip=$(bracketify $CLUSTER_INITIATOR_IP) + return 1 + } + # end of cluster_exists() + + # RAFT clusters need an odd number of members to achieve consensus. + # The CNO determines which members make up the cluster, so if this container + # is not supposed to be part of the cluster, wait forever doing nothing + # (instad of exiting and causing CrashLoopBackoffs for no reason). + if [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":${pod_dns_name}:".* ]] && [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":[${pod_dns_name}]:".* ]]; then + echo "$(date -Iseconds) - not selected as RAFT member; sleeping..." + sleep 1500d + exit 0 + fi + + OVN_ARGS="--db-nb-cluster-local-port={{.OVN_NB_RAFT_PORT}} \ + --db-nb-cluster-local-addr=${pod_dns_name} \ + --no-monitor \ + --db-nb-cluster-local-proto=ssl \ + --ovn-nb-db-ssl-key=/ovn-cert/tls.key \ + --ovn-nb-db-ssl-cert=/ovn-cert/tls.crt \ + --ovn-nb-db-ssl-ca-cert=/ovn-ca/ca-bundle.crt" + + CLUSTER_INITIATOR_IP="{{.OVN_DB_CLUSTER_INITIATOR}}" + echo "$(date -Iseconds) - starting nbdb CLUSTER_INITIATOR_IP=${CLUSTER_INITIATOR_IP}, K8S_NODE_IP=${K8S_NODE_IP}" + initialize="false" + + if [[ ! -e ${ovn_db_file} ]]; then + initialize="true" + fi + + if [[ "${initialize}" == "true" ]]; then + # check to see if a cluster already exists. If it does, just join it. + counter=0 + cluster_found=false + while [ $counter -lt 5 ]; do + if cluster_exists ${db} ${db_port}; then + cluster_found=true + break + fi + sleep 1 + counter=$((counter+1)) + done + + if ${cluster_found}; then + echo "Cluster already exists for DB: ${db}" + # join existing cluster + exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ + --db-nb-cluster-remote-port={{.OVN_NB_RAFT_PORT}} \ + --db-nb-cluster-remote-addr=${init_ip} \ + --db-nb-cluster-remote-proto=ssl \ + --ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + run_nb_ovsdb & + + wait $! + else + # either we need to initialize a new cluster or wait for master to create it + if [[ "${pod_dns_name}" == "${CLUSTER_INITIATOR_IP}" ]]; then + # set DB election timer at DB creation time if OVN supports it + election_timer= + if test -n "$(/usr/share/ovn/scripts/ovn-ctl --help 2>&1 | grep "\--db-nb-election-timer")"; then + election_timer="--db-nb-election-timer=$(({{.OVN_NB_RAFT_ELECTION_TIMER}}*1000))" + fi + + exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ + --ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + ${election_timer} \ + run_nb_ovsdb & + + wait $! + else + echo "Joining the nbdb cluster with init_ip=${init_ip}..." + exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ + --db-nb-cluster-remote-port={{.OVN_NB_RAFT_PORT}} \ + --db-nb-cluster-remote-addr=${init_ip} \ + --db-nb-cluster-remote-proto=ssl \ + --ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + run_nb_ovsdb & + + wait $! + fi + fi + else + exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ + --ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + run_nb_ovsdb & + + wait $! + fi + + lifecycle: + postStart: + exec: + command: + - /bin/bash + - -c + - | + set -x + CLUSTER_INITIATOR_IP="{{.OVN_DB_CLUSTER_INITIATOR}}" + rm -f /var/run/ovn/ovnnb_db.pid + + pod_dns_name="${K8S_POD_NAME}.ovnkube-master-internal.{{.HostedClusterNamespace}}.svc.cluster.local" + + # exit early if this DB is not supposed to be part of the cluster + if [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":${pod_dns_name}:".* ]] && [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":[${pod_dns_name}]:".* ]]; then + exit 0 + fi + + # retry an operation a number of times, sleeping 2 seconds between each try + retry() { + local tries=${1} + local desc=${2} + local cmd=${3} + + local retries=0 + while ! ${cmd}; do + (( retries += 1 )) + if [[ "${retries}" -gt ${tries} ]]; then + echo "$(date -Iseconds) - ERROR - nbdb ${desc} - too many failed attempts, giving up" + return 1 + fi + echo "$(date -Iseconds) - WARN - nbdb ${desc} - failed try ${retries}, retrying..." + sleep 2 + done + echo "$(date -Iseconds) - INFO - nbdb ${desc} - success" + return 0 + } + + if [[ "${pod_dns_name}" == "${CLUSTER_INITIATOR_IP}" ]]; then + echo "$(date -Iseconds) - nbdb - postStart - waiting for master to be selected" + + # set the connection and inactivity probe + if ! retry 60 "inactivity-probe" "ovn-nbctl --no-leader-only -t 5 set-connection pssl:{{.OVN_NB_PORT}}{{.LISTEN_DUAL_STACK}} -- set connection . inactivity_probe={{.OVN_NB_INACTIVITY_PROBE}}"; then + exit 1 + fi + + # Upgrade the db if required. + DB_SCHEMA="/usr/share/ovn/ovn-nb.ovsschema" + DB_SERVER="unix:/var/run/ovn/ovnnb_db.sock" + schema_name=$(ovsdb-tool schema-name $DB_SCHEMA) + db_version=$(ovsdb-client -t 10 get-schema-version "$DB_SERVER" "$schema_name") + target_version=$(ovsdb-tool schema-version "$DB_SCHEMA") + + if ovsdb-tool compare-versions "$db_version" == "$target_version"; then + : + elif ovsdb-tool compare-versions "$db_version" ">" "$target_version"; then + echo "Database $schema_name has newer schema version ($db_version) than our local schema ($target_version), possibly an upgrade is partially complete?" + else + echo "Upgrading database $schema_name from schema version $db_version to $target_version" + ovsdb-client -t 30 convert "$DB_SERVER" "$DB_SCHEMA" + fi + fi + + # read the current northd_probe_interval from the DB + OVN_NB_CTL="ovn-nbctl -p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt --db "{{.OVN_NB_DB_LIST}}"" + northd_probe_interval=${OVN_NORTHD_PROBE_INTERVAL:-10000} + echo "Setting northd probe interval to ${northd_probe_interval} ms" + retries=0 + current_probe_interval=0 + while [[ "${retries}" -lt 20 ]]; do + current_probe_interval=$(${OVN_NB_CTL} --if-exists get NB_GLOBAL . options:northd_probe_interval) + if [[ $? == 0 ]]; then + current_probe_interval=$(echo ${current_probe_interval} | tr -d '\"') + break + else + sleep 2 + (( retries += 1 )) + fi + done + + # ensure the northd_probe_interval is set to the configured value + if [[ "${current_probe_interval}" != "${northd_probe_interval}" ]]; then + if ! retry 20 "northd-probe" "${OVN_NB_CTL} set NB_GLOBAL . options:northd_probe_interval=${northd_probe_interval}"; then + exit 1 + fi + fi + + # Enable/disable IPsec + {{ if .OVNIPsecEnable }} + ipsec=true + {{ else }} + ipsec=false + {{ end }} + if ! retry 20 "ipsec" "${OVN_NB_CTL} set nb_global . ipsec=${ipsec}"; then + exit 1 + fi + preStop: + exec: + command: + - /bin/bash + - -c + - | + echo "$(date -Iseconds) - stopping nbdb" + /usr/share/ovn/scripts/ovn-ctl stop_nb_ovsdb + echo "$(date -Iseconds) - nbdb stopped" + rm -f /var/run/ovn/ovnnb_db.pid + readinessProbe: + initialDelaySeconds: 90 + timeoutSeconds: 5 + exec: + command: + - /bin/bash + - -c + - | + set -xeo pipefail + + # exit early if this DB is not supposed to be part of the cluster + pod_dns_name="${K8S_POD_NAME}.ovnkube-master-internal.{{.HostedClusterNamespace}}.svc.cluster.local" + if [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":${pod_dns_name}:".* ]] && [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":[${pod_dns_name}]:".* ]]; then + exit 0 + fi + + leader_status=$(/usr/bin/ovn-appctl -t /var/run/ovn/ovnnb_db.ctl --timeout=3 cluster/status OVN_Northbound 2>/dev/null | { grep "Leader: unknown" || true; }) + if [[ ! -z "${leader_status}" ]]; then + echo "NB DB Raft leader is unknown to the cluster node." + exit 1 + fi + # set trim-on-compaction + /usr/bin/ovn-appctl -t /var/run/ovn/ovnnb_db.ctl --timeout=5 ovsdb-server/memory-trim-on-compaction on 2>/dev/null + env: + - name: OVN_LOG_LEVEL + value: info + - name: OVN_NORTHD_PROBE_INTERVAL + value: "{{.OVN_NORTHD_PROBE_INTERVAL}}" + - name: K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: K8S_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + volumeMounts: + - mountPath: /etc/ovn + name: datadir + - mountPath: /var/run/ovn + name: datadir + - mountPath: /var/log/ovn + name: datadir + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 30Mi + ports: + - name: nb-db-port + containerPort: {{.OVN_NB_PORT}} + - name: nb-db-raft-port + containerPort: {{.OVN_NB_RAFT_PORT}} + terminationMessagePolicy: FallbackToLogsOnError + + # sbdb: The southbound, or flow DB. In raft mode + - name: sbdb + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xm + if [[ -f /env/_master ]]; then + set -o allexport + source /env/_master + set +o allexport + fi + + quit() { + echo "$(date -Iseconds) - stopping sbdb" + /usr/share/ovn/scripts/ovn-ctl stop_sb_ovsdb + echo "$(date -Iseconds) - sbdb stopped" + rm -f /var/run/ovn/ovnsb_db.pid + exit 0 + } + # end of quit + trap quit TERM INT + + # initialize variables + pod_dns_name="${K8S_POD_NAME}.ovnkube-master-internal.{{.HostedClusterNamespace}}.svc.cluster.local" + ovndb_ctl_ssl_opts="-p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt" + transport="ssl" + ovn_raft_conn_ip_url_suffix="" + if [[ "${K8S_POD_IP}" == *":"* ]]; then + ovn_raft_conn_ip_url_suffix=":[::]" + fi + db="sb" + db_port="{{.OVN_SB_PORT}}" + ovn_db_file="/etc/ovn/ovn${db}_db.db" + # checks if a db pod is part of a current cluster + db_part_of_cluster() { + local pod_index=${1} + local db=${2} + local port=${3} + echo "Checking if ovnkube-master-${pod_index} is part of cluster" + init_ip="ovnkube-master-${pod_index}.ovnkube-master-internal.{{.HostedClusterNamespace}}.svc.cluster.local" + echo "ovnkube-master-${pod_index} ip: $init_ip" + target=$(ovn-${db}ctl --timeout=5 --db=${transport}:${init_ip}:${port} ${ovndb_ctl_ssl_opts} \ + --data=bare --no-headings --columns=target list connection || true) + if [[ "${target}" != "p${transport}:${port}${ovn_raft_conn_ip_url_suffix}" ]]; then + echo "Unable to check correct target ${target} " + return 1 + fi + echo "ovnkube-master-${pod_index} is part of cluster" + return 0 + } + # end of db_part_of_cluster + + # Checks if cluster has already been initialized. + # If not it returns false and sets init_ip to CLUSTER_INITIATOR_IP + cluster_exists() { + local db=${1} + local port=${2} + for ((i=0; i<{{.OvnkubeMasterReplicas}}; i++ )); do + if db_part_of_cluster $i $db $port; then + echo "ovnkube-master-${i} is part of current cluster with ip: ${init_ip}!" + return 0 + fi + done + # if we get here there is no cluster, set init_ip and get out + init_ip=$CLUSTER_INITIATOR_IP + return 1 + } + # end of cluster_exists() + + # RAFT clusters need an odd number of members to achieve consensus. + # The CNO determines which members make up the cluster, so if this container + # is not supposed to be part of the cluster, wait forever doing nothing + # (instad of exiting and causing CrashLoopBackoffs for no reason). + if [[ ! "{{.OVN_SB_DB_LIST}}" =~ .*":${pod_dns_name}:".* ]] && [[ ! "{{.OVN_SB_DB_LIST}}" =~ .*":[${pod_dns_name}]:".* ]]; then + echo "$(date -Iseconds) - not selected as RAFT member; sleeping..." + sleep 1500d + exit 0 + fi + + OVN_ARGS="--db-sb-cluster-local-port={{.OVN_SB_RAFT_PORT}} \ + --db-sb-cluster-local-addr="${pod_dns_name}" \ + --no-monitor \ + --db-sb-cluster-local-proto=ssl \ + --ovn-sb-db-ssl-key=/ovn-cert/tls.key \ + --ovn-sb-db-ssl-cert=/ovn-cert/tls.crt \ + --ovn-sb-db-ssl-ca-cert=/ovn-ca/ca-bundle.crt" + + CLUSTER_INITIATOR_IP="{{.OVN_DB_CLUSTER_INITIATOR}}" + echo "$(date -Iseconds) - starting sbdb CLUSTER_INITIATOR_IP=${CLUSTER_INITIATOR_IP}" + initialize="false" + + if [[ ! -e ${ovn_db_file} ]]; then + initialize="true" + fi + + if [[ "${initialize}" == "true" ]]; then + # check to see if a cluster already exists. If it does, just join it. + counter=0 + cluster_found=false + while [ $counter -lt 5 ]; do + if cluster_exists ${db} ${db_port}; then + cluster_found=true + break + fi + sleep 1 + counter=$((counter+1)) + done + + if ${cluster_found}; then + echo "Cluster already exists for DB: ${db}" + # join existing cluster + exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ + --db-sb-cluster-remote-port={{.OVN_SB_RAFT_PORT}} \ + --db-sb-cluster-remote-addr=${init_ip} \ + --db-sb-cluster-remote-proto=ssl \ + --ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + run_sb_ovsdb & + + wait $! + else + # either we need to initialize a new cluster or wait for master to create it + if [[ "${pod_dns_name}" == "${CLUSTER_INITIATOR_IP}" ]]; then + # set DB election timer at DB creation time if OVN supports it + election_timer= + if test -n "$(/usr/share/ovn/scripts/ovn-ctl --help 2>&1 | grep "\--db-sb-election-timer")"; then + election_timer="--db-sb-election-timer=$(({{.OVN_SB_RAFT_ELECTION_TIMER}}*1000))" + fi + + exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ + --ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + ${election_timer} \ + run_sb_ovsdb & + + wait $! + else + exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ + --db-sb-cluster-remote-port={{.OVN_SB_RAFT_PORT}} \ + --db-sb-cluster-remote-addr=${init_ip} \ + --db-sb-cluster-remote-proto=ssl \ + --ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + run_sb_ovsdb & + + wait $! + fi + fi + else + exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ + --ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + run_sb_ovsdb & + + wait $! + fi + lifecycle: + postStart: + exec: + command: + - /bin/bash + - -c + - | + set -x + CLUSTER_INITIATOR_IP="{{.OVN_DB_CLUSTER_INITIATOR}}" + rm -f /var/run/ovn/ovnsb_db.pid + + pod_dns_name="${K8S_POD_NAME}.ovnkube-master-internal.{{.HostedClusterNamespace}}.svc.cluster.local" + + # exit early if this DB is not supposed to be part of the cluster + if [[ ! "{{.OVN_SB_DB_LIST}}" =~ .*":${pod_dns_name}:".* ]] && [[ ! "{{.OVN_SB_DB_LIST}}" =~ .*":[${pod_dns_name}]:".* ]]; then + exit 0 + fi + + # retry an operation a number of times, sleeping 2 seconds between each try + retry() { + local tries=${1} + local desc=${2} + local cmd=${3} + + local retries=0 + while ! ${cmd}; do + (( retries += 1 )) + if [[ "${retries}" -gt ${tries} ]]; then + echo "$(date -Iseconds) - ERROR - nbdb ${desc} - too many failed attempts, giving up" + return 1 + fi + echo "$(date -Iseconds) - WARN - nbdb ${desc} - failed try ${retries}, retrying..." + sleep 2 + done + echo "$(date -Iseconds) - INFO - nbdb ${desc} - success" + return 0 + } + + if [[ "${pod_dns_name}" == "${CLUSTER_INITIATOR_IP}" ]]; then + echo "$(date -Iseconds) - sdb - postStart - waiting for master to be selected" + + # set the connection and inactivity probe + if ! retry 60 "inactivity-probe" "ovn-sbctl --no-leader-only -t 5 set-connection pssl:{{.OVN_SB_PORT}}{{.LISTEN_DUAL_STACK}} -- set connection . inactivity_probe={{.OVN_CONTROLLER_INACTIVITY_PROBE}}"; then + exit 1 + fi + + # Upgrade the db if required. + DB_SCHEMA="/usr/share/ovn/ovn-sb.ovsschema" + DB_SERVER="unix:/var/run/ovn/ovnsb_db.sock" + schema_name=$(ovsdb-tool schema-name $DB_SCHEMA) + db_version=$(ovsdb-client -t 10 get-schema-version "$DB_SERVER" "$schema_name") + target_version=$(ovsdb-tool schema-version "$DB_SCHEMA") + + if ovsdb-tool compare-versions "$db_version" == "$target_version"; then + : + elif ovsdb-tool compare-versions "$db_version" ">" "$target_version"; then + echo "Database $schema_name has newer schema version ($db_version) than our local schema ($target_version), possibly an upgrade is partially complete?" + else + echo "Upgrading database $schema_name from schema version $db_version to $target_version" + ovsdb-client -t 30 convert "$DB_SERVER" "$DB_SCHEMA" + fi + fi + + # Kill some time while the cluster converges by checking IPsec status + OVN_SB_CTL="ovn-sbctl -p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt --db "{{.OVN_SB_DB_LIST}}"" + if ! retry 20 "ipsec" "${OVN_SB_CTL} get sb_global . ipsec"; then + exit 1 + fi + preStop: + exec: + command: + - /bin/bash + - -c + - | + echo "$(date -Iseconds) - stopping sbdb" + /usr/share/ovn/scripts/ovn-ctl stop_sb_ovsdb + echo "$(date -Iseconds) - sbdb stopped" + rm -f /var/run/ovn/ovnsb_db.pid + readinessProbe: + initialDelaySeconds: 90 + timeoutSeconds: 5 + exec: + command: + - /bin/bash + - -c + - | + set -xeo pipefail + + # exit early if this DB is not supposed to be part of the cluster + pod_dns_name="${K8S_POD_NAME}.ovnkube-master-internal.{{.HostedClusterNamespace}}.svc.cluster.local" + if [[ ! "{{.OVN_SB_DB_LIST}}" =~ .*":${pod_dns_name}:".* ]] && [[ ! "{{.OVN_SB_DB_LIST}}" =~ .*":[${pod_dns_name}]:".* ]]; then + exit 0 + fi + + leader_status=$(/usr/bin/ovn-appctl -t /var/run/ovn/ovnsb_db.ctl --timeout=3 cluster/status OVN_Southbound 2>/dev/null | { grep "Leader: unknown" || true; }) + if [[ ! -z "${leader_status}" ]]; then + echo "SB DB Raft leader is unknown to the cluster node." + exit 1 + fi + # set trim-on-compaction + /usr/bin/ovn-appctl -t /var/run/ovn/ovnsb_db.ctl --timeout=5 ovsdb-server/memory-trim-on-compaction on 2>/dev/null + env: + - name: OVN_LOG_LEVEL + value: info + - name: K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: K8S_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + volumeMounts: + - mountPath: /etc/ovn + name: datadir + - mountPath: /var/run/ovn + name: datadir + - mountPath: /var/log/ovn + name: datadir + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + ports: + - name: sb-db-port + containerPort: {{.OVN_SB_PORT}} + - name: sb-db-raft-port + containerPort: {{.OVN_SB_RAFT_PORT}} + resources: + requests: + cpu: 10m + memory: 130Mi + terminationMessagePolicy: FallbackToLogsOnError + + # ovnkube master: convert kubernetes objects in to nbdb logical network components + - name: ovnkube-master + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + if [[ -f "/env/_master" ]]; then + set -o allexport + source "/env/_master" + set +o allexport + fi + + # TLS for ovnkube-master metrics + TLS_PK=/etc/pki/tls/metrics-cert/tls.key + TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt + + if [ "{{.OVN_GATEWAY_MODE}}" == "shared" ]; then + gateway_mode_flags="--gateway-mode shared --gateway-interface br-ex" + elif [ "{{.OVN_GATEWAY_MODE}}" == "local" ]; then + gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" + else + echo "Invalid OVN_GATEWAY_MODE: \"{{.OVN_GATEWAY_MODE}}\". Must be \"local\" or \"shared\"." + exit 1 + fi + + retries=0 + while [ ! -f /var/run/secrets/hosted_cluster/token ]; do + (( retries += 1 )) + sleep 1 + if [[ "${retries}" -gt 30 ]]; then + echo "$(date -Iseconds) - Hosted cluster token not found" + exit 1 + fi + done + + multi_network_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_ENABLE}}" == "true" ]]; then + multi_network_enabled_flag="--enable-multi-network" + fi + + multi_network_policy_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_POLICY_ENABLE}}" == "true" ]]; then + multi_network_policy_enabled_flag="--enable-multi-networkpolicy" + fi + + echo "I$(date "+%m%d %H:%M:%S.%N") - ovnkube-master - start ovnkube --init-master ${K8S_NODE}" + exec /usr/bin/ovnkube \ + --enable-interconnect \ + --zone global \ + --init-master "${K8S_NODE}" \ + --config-file=/run/ovnkube-config/ovnkube.conf \ + --k8s-token-file=/var/run/secrets/hosted_cluster/token \ + --ovn-empty-lb-events \ + --loglevel "${OVN_KUBE_LOG_LEVEL}" \ + --metrics-bind-address "0.0.0.0:9102" \ + --metrics-enable-pprof \ + --metrics-enable-config-duration \ + ${gateway_mode_flags} \ + --sb-address "{{.OVN_SB_DB_LIST}}" \ + --sb-client-privkey /ovn-cert/tls.key \ + --sb-client-cert /ovn-cert/tls.crt \ + --sb-client-cacert /ovn-ca/ca-bundle.crt \ + --sb-cert-common-name "{{.OVN_CERT_CN}}" \ + --nb-address "{{.OVN_NB_DB_LIST}}" \ + --nb-client-privkey /ovn-cert/tls.key \ + --nb-client-cert /ovn-cert/tls.crt \ + --nb-client-cacert /ovn-ca/ca-bundle.crt \ + --nb-cert-common-name "{{.OVN_CERT_CN}}" \ + --enable-multicast \ + --disable-snat-multiple-gws \ + --node-server-privkey ${TLS_PK} \ + --node-server-cert ${TLS_CERT} \ + ${multi_network_enabled_flag} \ + ${multi_network_policy_enabled_flag} \ + --acl-logging-rate-limit "{{.OVNPolicyAuditRateLimit}}" + volumeMounts: + - mountPath: /etc/ovn + name: datadir + - mountPath: /var/run/ovn + name: datadir + - mountPath: /run/ovnkube-config/ + name: ovnkube-config + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + - mountPath: /hosted-ca + name: hosted-ca-cert + - mountPath: /var/run/secrets/hosted_cluster + name: hosted-cluster-api-access + - name: ovn-master-metrics-cert + mountPath: /etc/pki/tls/metrics-cert + readOnly: True + resources: + requests: + cpu: 10m + memory: 200Mi + env: + - name: OVN_KUBE_LOG_LEVEL + value: "4" + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: ALL_PROXY + value: socks5://127.0.0.1:8090 + - name: NO_PROXY + value: kube-apiserver + ports: + - name: metrics-port + containerPort: 29102 + terminationMessagePolicy: FallbackToLogsOnError + # ovn-dbchecker: monitor clustered ovn databases for db health and stale raft members + - name: ovn-dbchecker + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + if [[ -f "/env/_master" ]]; then + set -o allexport + source "/env/_master" + set +o allexport + fi + + retries=0 + while [ ! -f /var/run/secrets/hosted_cluster/token ]; do + (( retries += 1 )) + sleep 1 + if [[ "${retries}" -gt 30 ]]; then + echo "$(date -Iseconds) - Hosted cluster token not found" + exit 1 + fi + done + + echo "I$(date "+%m%d %H:%M:%S.%N") - ovn-dbchecker - start ovn-dbchecker" + + # RAFT clusters need an odd number of members to achieve consensus. + # The CNO determines which members make up the cluster, so if this container + # is not supposed to be part of the cluster, wait forever doing nothing + # (instad of exiting and causing CrashLoopBackoffs for no reason). + pod_dns_name="${K8S_POD_NAME}.ovnkube-master-internal.{{.HostedClusterNamespace}}.svc.cluster.local" + if [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":${pod_dns_name}:".* ]] && [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":[${pod_dns_name}]:".* ]]; then + echo "$(date -Iseconds) - not selected as RAFT member; sleeping..." + sleep 1500d + exit 0 + fi + + exec /usr/bin/ovndbchecker \ + --config-file=/run/ovnkube-config/ovnkube.conf \ + --k8s-token-file=/var/run/secrets/hosted_cluster/token \ + --loglevel "${OVN_KUBE_LOG_LEVEL}" \ + --sb-address "{{.OVN_SB_DB_LIST}}" \ + --sb-client-privkey /ovn-cert/tls.key \ + --sb-client-cert /ovn-cert/tls.crt \ + --sb-client-cacert /ovn-ca/ca-bundle.crt \ + --sb-cert-common-name "{{.OVN_CERT_CN}}" \ + --sb-raft-election-timer "{{.OVN_SB_RAFT_ELECTION_TIMER}}" \ + --nb-address "{{.OVN_NB_DB_LIST}}" \ + --nb-client-privkey /ovn-cert/tls.key \ + --nb-client-cert /ovn-cert/tls.crt \ + --nb-client-cacert /ovn-ca/ca-bundle.crt \ + --nb-cert-common-name "{{.OVN_CERT_CN}}" \ + --nb-raft-election-timer "{{.OVN_NB_RAFT_ELECTION_TIMER}}" + volumeMounts: + - mountPath: /etc/ovn + name: datadir + - mountPath: /var/run/ovn + name: datadir + - mountPath: /run/ovnkube-config/ + name: ovnkube-config + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + - mountPath: /hosted-ca + name: hosted-ca-cert + - mountPath: /var/run/secrets/hosted_cluster + name: hosted-cluster-api-access + resources: + requests: + cpu: 10m + memory: 50Mi + env: + - name: OVN_KUBE_LOG_LEVEL + value: "4" + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + terminationMessagePolicy: FallbackToLogsOnError + - name: socks-proxy + image: "{{.Socks5ProxyImage}}" + command: + - /usr/bin/control-plane-operator + - konnectivity-socks5-proxy + args: + - "run" + volumeMounts: + - mountPath: /etc/kubernetes/ + name: admin-kubeconfig + - mountPath: /etc/konnectivity/proxy-client + name: konnectivity-proxy-cert + readOnly: true + - mountPath: /etc/konnectivity/proxy-ca + name: konnectivity-proxy-ca + readOnly: true + resources: + requests: + cpu: 10m + memory: 10Mi + env: + - name: KUBECONFIG + value: "/etc/kubernetes/kubeconfig" + terminationMessagePolicy: FallbackToLogsOnError + {{ if .HCPNodeSelector }} + nodeSelector: + {{ range $key, $value := .HCPNodeSelector }} + "{{$key}}": "{{$value}}" + {{ end }} + {{ end }} + volumes: + - name: ovnkube-config + configMap: + name: ovnkube-config + - name: konnectivity-proxy-ca + configMap: + name: konnectivity-ca-bundle + - name: konnectivity-proxy-cert + secret: + defaultMode: 0640 + secretName: konnectivity-client + - name: env-overrides + configMap: + name: env-overrides + optional: true + - name: ovn-ca + configMap: + name: ovn-ca + - name: ovn-cert + secret: + secretName: ovn-cert + - name: ovn-master-metrics-cert + secret: + secretName: ovn-master-metrics-cert + - name: admin-kubeconfig + secret: + secretName: service-network-admin-kubeconfig + - name: hosted-cluster-api-access + emptyDir: {} + - name: hosted-ca-cert + secret: + secretName: root-ca + items: + - key: ca.crt + path: ca.crt + tolerations: + - key: "hypershift.openshift.io/control-plane" + operator: "Equal" + value: "true" + effect: "NoSchedule" + - key: "hypershift.openshift.io/cluster" + operator: "Equal" + value: {{.HostedClusterNamespace}} + effect: "NoSchedule" diff --git a/bindata/network/ovn-kubernetes/managed/single-zone-interconnect/ovnkube-node.yaml b/bindata/network/ovn-kubernetes/managed/single-zone-interconnect/ovnkube-node.yaml new file mode 100644 index 0000000000..6268d00d13 --- /dev/null +++ b/bindata/network/ovn-kubernetes/managed/single-zone-interconnect/ovnkube-node.yaml @@ -0,0 +1,763 @@ +--- +kind: DaemonSet +apiVersion: apps/v1 +metadata: + {{ if eq .OVN_NODE_MODE "dpu-host" }} + name: ovnkube-node-dpu-host + {{ else if eq .OVN_NODE_MODE "smart-nic" }} + name: ovnkube-node-smart-nic + {{ else }} + name: ovnkube-node + {{ end }} + namespace: openshift-ovn-kubernetes + annotations: + kubernetes.io/description: | + This daemonset launches the ovn-kubernetes per node networking components. + release.openshift.io/version: "{{.ReleaseVersion}}" +spec: + selector: + matchLabels: + {{ if eq .OVN_NODE_MODE "dpu-host" }} + app: ovnkube-node-dpu-host + {{ else if eq .OVN_NODE_MODE "smart-nic" }} + app: ovnkube-node-smart-nic + {{ else }} + app: ovnkube-node + {{ end }} + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 10% + template: + metadata: + annotations: + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + labels: + {{ if eq .OVN_NODE_MODE "dpu-host" }} + app: ovnkube-node-dpu-host + {{ else if eq .OVN_NODE_MODE "smart-nic" }} + app: ovnkube-node-smart-nic + {{ else }} + app: ovnkube-node + {{ end }} + component: network + type: infra + openshift.io/component: network + kubernetes.io/os: "linux" + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + {{ if .DpuHostModeLabel }} + - key: {{ .DpuHostModeLabel }} + {{ if eq .OVN_NODE_MODE "dpu-host" }} + operator: Exists + {{ else if eq .OVN_NODE_MODE "smart-nic" }} + operator: DoesNotExist + {{ else }} + operator: DoesNotExist + {{ end }} + {{ end }} + {{ if .SmartNicModeLabel }} + - key: {{ .SmartNicModeLabel }} + {{ if eq .OVN_NODE_MODE "dpu-host" }} + operator: DoesNotExist + {{ else if eq .OVN_NODE_MODE "smart-nic" }} + operator: Exists + {{ else }} + operator: DoesNotExist + {{ end }} + {{ end }} + {{ if .DpuModeLabel }} + - key: {{ .DpuModeLabel }} + operator: DoesNotExist + {{ end }} + serviceAccountName: ovn-kubernetes-node + hostNetwork: true + dnsPolicy: Default + hostPID: true + priorityClassName: "system-node-critical" + # When proxy is enabled in hypershift, ovn sbdb requests are redirected + # to 127.0.0.1 with hostAlias settings. Ovnkube-node-proxy container + # listens on 127.0.0.1 and connects to destination ovn sbdb route or + # node port service through http proxy server. + {{if .ENABLE_OVN_NODE_PROXY}} + hostAliases: + - hostnames: + - {{.OVN_SB_DB_ROUTE_HOST}} + ip: 127.0.0.1 + {{end}} + initContainers: + # ovnkube-node-init: wait for sbdb ready + {{ if or (eq .OVN_NODE_MODE "full") (eq .OVN_NODE_MODE "smart-nic") }} + - name: ovnkube-node-init + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + if [[ -f "/env/${K8S_NODE}" ]]; then + set -o allexport + source "/env/${K8S_NODE}" + set +o allexport + fi + echo "$(date -Iseconds) - checking sbdb" + + {{if .ENABLE_OVN_NODE_PROXY}} + # Don't use /etc/hosts to resolve OVN_SB_DB_ROUTE_HOST which points to 127.0.0.1 (see spec.hostAliases) + echo "hosts: dns files" >> /etc/nsswitch.conf + exec socat TCP-LISTEN:{{.OVN_SB_DB_ROUTE_LOCAL_PORT}},reuseaddr,fork PROXY:{{.HTTP_PROXY_IP}}:{{.OVN_SB_DB_ROUTE_HOST}}:{{.OVN_SB_DB_ROUTE_PORT}},proxyport={{.HTTP_PROXY_PORT}} & + proxypid=$! + {{end}} + + ovndb_ctl_ssl_opts="-p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt" + sbdb_ip="{{.OVN_SB_DB_ENDPOINT}}" + retries=0 + while ! ovn-sbctl --no-leader-only --timeout=5 --db=${sbdb_ip} ${ovndb_ctl_ssl_opts} get-connection; do + (( retries += 1 )) + if [[ "${retries}" -gt 40 ]]; then + echo "$(date -Iseconds) - ERROR RESTARTING - sbdb - too many failed ovn-sbctl attempts, giving up" + exit 1 + fi + sleep 2 + done + + {{if .ENABLE_OVN_NODE_PROXY}} + kill $proxypid + {{end}} + volumeMounts: + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + {{ end }} + + # volumes in all containers: + # (container) -> (host) + # /etc/openvswitch -> /etc/openvswitch - ovsdb system id + # /var/lib/openvswitch -> /var/lib/openvswitch/data - ovsdb data + # /run/openvswitch -> tmpfs - ovsdb sockets + # /env -> configmap env-overrides - debug overrides + containers: + {{ if or (eq .OVN_NODE_MODE "full") (eq .OVN_NODE_MODE "smart-nic") }} + {{if .ENABLE_OVN_NODE_PROXY}} + # ovnkube-node-proxy redirects ovn sbdb traffic to http proxy + - name: ovnkube-node-proxy + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + if [[ -f "/env/${K8S_NODE}" ]]; then + set -o allexport + source "/env/${K8S_NODE}" + set +o allexport + fi + echo "$(date -Iseconds) - starting ovnkube-node-proxy" + # Don't use /etc/hosts to resolve OVN_SB_DB_ROUTE_HOST which points to 127.0.0.1 (see spec.hostAliases) + echo "hosts: dns files" >> /etc/nsswitch.conf + exec socat TCP-LISTEN:{{.OVN_SB_DB_ROUTE_LOCAL_PORT}},reuseaddr,fork PROXY:{{.HTTP_PROXY_IP}}:{{.OVN_SB_DB_ROUTE_HOST}}:{{.OVN_SB_DB_ROUTE_PORT}},proxyport={{.HTTP_PROXY_PORT}} + env: + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - mountPath: /env + name: env-overrides + terminationMessagePolicy: FallbackToLogsOnError + resources: + requests: + cpu: 10m + memory: 20Mi + {{end}} + # ovn-controller: programs the vswitch with flows from the sbdb + - name: ovn-controller + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -e + if [[ -f "/env/${K8S_NODE}" ]]; then + set -o allexport + source "/env/${K8S_NODE}" + set +o allexport + fi + + echo "$(date -Iseconds) - starting ovn-controller" + exec ovn-controller unix:/var/run/openvswitch/db.sock -vfile:off \ + --no-chdir --pidfile=/var/run/ovn/ovn-controller.pid \ + --syslog-method="{{.OVNPolicyAuditDestination}}" \ + --log-file=/var/log/ovn/acl-audit-log.log \ + -vFACILITY:"{{.OVNPolicyAuditSyslogFacility}}" \ + -p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt \ + -vconsole:"${OVN_LOG_LEVEL}" -vconsole:"acl_log:off" \ + -vPATTERN:console:"{{.OVN_LOG_PATTERN_CONSOLE}}" \ + -vsyslog:"acl_log:info" \ + -vfile:"acl_log:info" + securityContext: + privileged: true + env: + - name: OVN_LOG_LEVEL + value: info + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - mountPath: /run/openvswitch + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /etc/openvswitch + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch + name: var-lib-openvswitch + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + - mountPath: /var/log/ovn + name: node-log + - mountPath: /dev/log + name: log-socket + terminationMessagePolicy: FallbackToLogsOnError + resources: + requests: + cpu: 10m + memory: 300Mi + - name: ovn-acl-logging + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -euo pipefail + + # Rotate audit log files when then get to max size (in bytes) + MAXFILESIZE=$(( "{{.OVNPolicyAuditMaxFileSize}}"*1000000 )) + LOGFILE=/var/log/ovn/acl-audit-log.log + CONTROLLERPID=$(cat /run/ovn/ovn-controller.pid) + + # Redirect err to null so no messages are shown upon rotation + tail -F ${LOGFILE} 2> /dev/null & + + while true + do + # Make sure ovn-controller's logfile exists, and get current size in bytes + if [ -f "$LOGFILE" ]; then + file_size=`du -b ${LOGFILE} | tr -s '\t' ' ' | cut -d' ' -f1` + else + ovs-appctl -t /var/run/ovn/ovn-controller.${CONTROLLERPID}.ctl vlog/reopen + file_size=`du -b ${LOGFILE} | tr -s '\t' ' ' | cut -d' ' -f1` + fi + + if [ $file_size -gt $MAXFILESIZE ];then + echo "Rotating OVN ACL Log File" + timestamp=`date '+%Y-%m-%dT%H-%M-%S'` + mv ${LOGFILE} /var/log/ovn/acl-audit-log.$timestamp.log + ovs-appctl -t /run/ovn/ovn-controller.${CONTROLLERPID}.ctl vlog/reopen + CONTROLLERPID=$(cat /run/ovn/ovn-controller.pid) + fi + + # sleep for 30 seconds to avoid wasting CPU + sleep 30 + done + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/log/ovn + name: node-log + - mountPath: /run/ovn/ + name: run-ovn + {{ end }} + - name: kube-rbac-proxy + image: {{.KubeRBACProxyImage}} + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + TLS_PK=/etc/pki/tls/metrics-cert/tls.key + TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt + # As the secret mount is optional we must wait for the files to be present. + # The service is created in monitor.yaml and this is created in sdn.yaml. + # If it isn't created there is probably an issue so we want to crashloop. + retries=0 + TS=$(date +%s) + WARN_TS=$(( ${TS} + $(( 20 * 60)) )) + HAS_LOGGED_INFO=0 + + log_missing_certs(){ + CUR_TS=$(date +%s) + if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then + echo $(date -Iseconds) WARN: ovn-node-metrics-cert not mounted after 20 minutes. + elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then + echo $(date -Iseconds) INFO: ovn-node-metrics-cert not mounted. Waiting one hour. + HAS_LOGGED_INFO=1 + fi + } + while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do + log_missing_certs + sleep 5 + done + + echo $(date -Iseconds) INFO: ovn-node-metrics-certs mounted, starting kube-rbac-proxy + exec /usr/bin/kube-rbac-proxy \ + --logtostderr \ + --secure-listen-address=:9103 \ + --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ + --upstream=http://127.0.0.1:29103/ \ + --tls-private-key-file=${TLS_PK} \ + --tls-cert-file=${TLS_CERT} + ports: + - containerPort: 9103 + name: https + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: ovn-node-metrics-cert + mountPath: /etc/pki/tls/metrics-cert + readOnly: True + - name: kube-rbac-proxy-ovn-metrics + image: {{.KubeRBACProxyImage}} + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + TLS_PK=/etc/pki/tls/metrics-cert/tls.key + TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt + # As the secret mount is optional we must wait for the files to be present. + # The service is created in monitor.yaml and this is created in sdn.yaml. + # If it isn't created there is probably an issue so we want to crashloop. + retries=0 + TS=$(date +%s) + WARN_TS=$(( ${TS} + $(( 20 * 60)) )) + HAS_LOGGED_INFO=0 + + log_missing_certs(){ + CUR_TS=$(date +%s) + if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then + echo $(date -Iseconds) WARN: ovn-node-metrics-cert not mounted after 20 minutes. + elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then + echo $(date -Iseconds) INFO: ovn-node-metrics-cert not mounted. Waiting one hour. + HAS_LOGGED_INFO=1 + fi + } + while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do + log_missing_certs + sleep 5 + done + + echo $(date -Iseconds) INFO: ovn-node-metrics-certs mounted, starting kube-rbac-proxy + exec /usr/bin/kube-rbac-proxy \ + --logtostderr \ + --secure-listen-address=:9105 \ + --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ + --upstream=http://127.0.0.1:29105/ \ + --tls-private-key-file=${TLS_PK} \ + --tls-cert-file=${TLS_CERT} + ports: + - containerPort: 9105 + name: https + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: ovn-node-metrics-cert + mountPath: /etc/pki/tls/metrics-cert + readOnly: True + # ovnkube-node: does node-level bookkeeping and configuration + - name: ovnkube-node + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + if [[ -f "/env/${K8S_NODE}" ]]; then + set -o allexport + source "/env/${K8S_NODE}" + set +o allexport + fi + echo "I$(date "+%m%d %H:%M:%S.%N") - waiting for db_ip addresses" + cp -f /usr/libexec/cni/ovn-k8s-cni-overlay /cni-bin-dir/ + ovn_config_namespace=openshift-ovn-kubernetes + echo "I$(date "+%m%d %H:%M:%S.%N") - disable conntrack on geneve port" + iptables -t raw -A PREROUTING -p udp --dport {{.GenevePort}} -j NOTRACK + iptables -t raw -A OUTPUT -p udp --dport {{.GenevePort}} -j NOTRACK + ip6tables -t raw -A PREROUTING -p udp --dport {{.GenevePort}} -j NOTRACK + ip6tables -t raw -A OUTPUT -p udp --dport {{.GenevePort}} -j NOTRACK + retries=0 + + echo "I$(date "+%m%d %H:%M:%S.%N") - starting ovnkube-node db_ip ${db_ip}" + + if [ "{{.OVN_GATEWAY_MODE}}" == "shared" ]; then + gateway_mode_flags="--gateway-mode shared --gateway-interface br-ex" + elif [ "{{.OVN_GATEWAY_MODE}}" == "local" ]; then + gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" + else + echo "Invalid OVN_GATEWAY_MODE: \"{{.OVN_GATEWAY_MODE}}\". Must be \"local\" or \"shared\"." + exit 1 + fi + + export_network_flows_flags= + if [[ -n "${NETFLOW_COLLECTORS}" ]] ; then + export_network_flows_flags="--netflow-targets ${NETFLOW_COLLECTORS}" + fi + if [[ -n "${SFLOW_COLLECTORS}" ]] ; then + export_network_flows_flags="$export_network_flows_flags --sflow-targets ${SFLOW_COLLECTORS}" + fi + if [[ -n "${IPFIX_COLLECTORS}" ]] ; then + export_network_flows_flags="$export_network_flows_flags --ipfix-targets ${IPFIX_COLLECTORS}" + fi + if [[ -n "${IPFIX_CACHE_MAX_FLOWS}" ]] ; then + export_network_flows_flags="$export_network_flows_flags --ipfix-cache-max-flows ${IPFIX_CACHE_MAX_FLOWS}" + fi + if [[ -n "${IPFIX_CACHE_ACTIVE_TIMEOUT}" ]] ; then + export_network_flows_flags="$export_network_flows_flags --ipfix-cache-active-timeout ${IPFIX_CACHE_ACTIVE_TIMEOUT}" + fi + if [[ -n "${IPFIX_SAMPLING}" ]] ; then + export_network_flows_flags="$export_network_flows_flags --ipfix-sampling ${IPFIX_SAMPLING}" + fi + gw_interface_flag= + # if br-ex1 is configured on the node, we want to use it for external gateway traffic + if [ -d /sys/class/net/br-ex1 ]; then + gw_interface_flag="--exgw-interface=br-ex1" + fi + + node_mgmt_port_netdev_flags= + if [[ -n "${OVNKUBE_NODE_MGMT_PORT_NETDEV}" ]] ; then + node_mgmt_port_netdev_flags="--ovnkube-node-mgmt-port-netdev ${OVNKUBE_NODE_MGMT_PORT_NETDEV}" + fi + if [[ -n "${OVNKUBE_NODE_MGMT_PORT_DP_RESOURCE_NAME}" ]] ; then + node_mgmt_port_netdev_flags="$node_mgmt_port_netdev_flags --ovnkube-node-mgmt-port-dp-resource-name ${OVNKUBE_NODE_MGMT_PORT_DP_RESOURCE_NAME}" + fi + + multi_network_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_ENABLE}}" == "true" ]]; then + multi_network_enabled_flag="--enable-multi-network" + fi + + multi_network_policy_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_POLICY_ENABLE}}" == "true" ]]; then + multi_network_policy_enabled_flag="--enable-multi-networkpolicy" + fi + + exec /usr/bin/ovnkube --init-node "${K8S_NODE}" \ + --enable-interconnect \ + --zone global \ + --nb-address "{{.OVN_NB_DB_ENDPOINT}}" \ + --sb-address "{{.OVN_SB_DB_ENDPOINT}}" \ + --nb-client-privkey /ovn-cert/tls.key \ + --nb-client-cert /ovn-cert/tls.crt \ + --nb-client-cacert /ovn-ca/ca-bundle.crt \ + --nb-cert-common-name "{{.OVN_CERT_CN}}" \ + --sb-client-privkey /ovn-cert/tls.key \ + --sb-client-cert /ovn-cert/tls.crt \ + --sb-client-cacert /ovn-ca/ca-bundle.crt \ + --sb-cert-common-name "{{.OVN_CERT_CN}}" \ + --config-file=/run/ovnkube-config/ovnkube.conf \ + --loglevel "${OVN_KUBE_LOG_LEVEL}" \ + --inactivity-probe="${OVN_CONTROLLER_INACTIVITY_PROBE}" \ + ${gateway_mode_flags} \ + ${node_mgmt_port_netdev_flags} \ + {{- if eq .OVN_NODE_MODE "dpu-host" }} + --ovnkube-node-mode dpu-host \ + {{- end }} + --metrics-bind-address "127.0.0.1:29103" \ + --ovn-metrics-bind-address "127.0.0.1:29105" \ + --metrics-enable-pprof \ + --export-ovs-metrics \ + --disable-snat-multiple-gws \ + ${export_network_flows_flags} \ + ${multi_network_enabled_flag} \ + ${multi_network_policy_enabled_flag} \ + ${gw_interface_flag} + env: + # for kubectl + - name: KUBERNETES_SERVICE_PORT + value: "{{.KUBERNETES_SERVICE_PORT}}" + - name: KUBERNETES_SERVICE_HOST + value: "{{.KUBERNETES_SERVICE_HOST}}" + - name: OVN_CONTROLLER_INACTIVITY_PROBE + value: "{{.OVN_CONTROLLER_INACTIVITY_PROBE}}" + - name: OVN_KUBE_LOG_LEVEL + value: "4" +{{ if .HTTP_PROXY }} + - name: "HTTP_PROXY" + value: "{{ .HTTP_PROXY}}" +{{ end }} +{{ if .HTTPS_PROXY }} + - name: "HTTPS_PROXY" + value: "{{ .HTTPS_PROXY}}" +{{ end }} +{{ if .NO_PROXY }} + - name: "NO_PROXY" + value: "{{ .NO_PROXY}}" +{{ end }} + {{ if .NetFlowCollectors }} + - name: NETFLOW_COLLECTORS + value: "{{.NetFlowCollectors}}" + {{ end }} + {{ if .SFlowCollectors }} + - name: SFLOW_COLLECTORS + value: "{{.SFlowCollectors}}" + {{ end }} + {{ if .IPFIXCollectors }} + - name: IPFIX_COLLECTORS + value: "{{.IPFIXCollectors}}" + {{ end }} + {{ if .IPFIXCacheMaxFlows }} + - name: IPFIX_CACHE_MAX_FLOWS + value: "{{.IPFIXCacheMaxFlows}}" + {{ end }} + {{ if .IPFIXCacheActiveTimeout }} + - name: IPFIX_CACHE_ACTIVE_TIMEOUT + value: "{{.IPFIXCacheActiveTimeout}}" + {{ end }} + {{ if .IPFIXSampling }} + - name: IPFIX_SAMPLING + value: "{{.IPFIXSampling}}" + {{ end }} + {{ if and (.MgmtPortResourceName) (or (eq .OVN_NODE_MODE "smart-nic") (eq .OVN_NODE_MODE "dpu-host")) }} + - name: OVNKUBE_NODE_MGMT_PORT_DP_RESOURCE_NAME + value: {{ .MgmtPortResourceName }} + {{ end }} + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + ports: + - name: metrics-port + containerPort: 29103 + securityContext: + privileged: true + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + # for checking ovs-configuration service + - mountPath: /etc/systemd/system + name: systemd-units + readOnly: true + # for the iptables wrapper + - mountPath: /host + name: host-slash + readOnly: true + mountPropagation: HostToContainer + # for the CNI server socket + - mountPath: /run/ovn-kubernetes/ + name: host-run-ovn-kubernetes + # accessing bind-mounted net namespaces + - mountPath: /run/netns + name: host-run-netns + readOnly: true + mountPropagation: HostToContainer + # for installing the CNI plugin binary + - mountPath: /cni-bin-dir + name: host-cni-bin + # for installing the CNI configuration file + - mountPath: /etc/cni/net.d + name: host-cni-netd + # Where we store IP allocations + - mountPath: /var/lib/cni/networks/ovn-k8s-cni-overlay + name: host-var-lib-cni-networks-ovn-kubernetes + - mountPath: /run/openvswitch + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /etc/openvswitch + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch + name: var-lib-openvswitch + - mountPath: /run/ovnkube-config/ + name: ovnkube-config + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + {{ if and (.MgmtPortResourceName) (or (eq .OVN_NODE_MODE "smart-nic") (eq .OVN_NODE_MODE "dpu-host")) }} + {{ .MgmtPortResourceName }}: '1' + {{ end }} + {{ if and (.MgmtPortResourceName) (or (eq .OVN_NODE_MODE "smart-nic") (eq .OVN_NODE_MODE "dpu-host")) }} + limits: + {{ .MgmtPortResourceName }}: '1' + {{ end }} + lifecycle: + preStop: + exec: + command: ["rm","-f","/etc/cni/net.d/10-ovn-kubernetes.conf"] + readinessProbe: + exec: + command: ["test", "-f", "/etc/cni/net.d/10-ovn-kubernetes.conf"] + initialDelaySeconds: 5 + periodSeconds: 30 + {{- if .OVNPlatformAzure}} + - name: drop-icmp + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + + touch /var/run/ovn/add_iptables.sh + chmod 0755 /var/run/ovn/add_iptables.sh + cat <<'EOF' > /var/run/ovn/add_iptables.sh + #!/bin/sh + if [ -z "$3" ] + then + echo "Called with host address missing, ignore" + exit 0 + fi + echo "Adding ICMP drop rule for '$3' " + if iptables -C CHECK_ICMP_SOURCE -p icmp -s $3 -j ICMP_ACTION + then + echo "iptables already set for $3" + else + iptables -A CHECK_ICMP_SOURCE -p icmp -s $3 -j ICMP_ACTION + fi + EOF + + echo "I$(date "+%m%d %H:%M:%S.%N") - drop-icmp - start drop-icmp ${K8S_NODE}" + iptables -X CHECK_ICMP_SOURCE || true + iptables -N CHECK_ICMP_SOURCE || true + iptables -F CHECK_ICMP_SOURCE + iptables -D INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE || true + iptables -I INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE + iptables -N ICMP_ACTION || true + iptables -F ICMP_ACTION + iptables -A ICMP_ACTION -j LOG + iptables -A ICMP_ACTION -j DROP + # + ip addr show + ip route show + iptables -nvL + iptables -nvL -t nat + oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_iptables.sh + #systemd-run -qPG -- oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_iptables.sh + lifecycle: + preStop: + exec: + command: ["/bin/bash", "-c", "echo drop-icmp done"] + securityContext: + privileged: true + volumeMounts: + # for the iptables wrapper + - mountPath: /host + name: host-slash + readOnly: true + mountPropagation: HostToContainer + - mountPath: /run/ovn/ + name: run-ovn + resources: + requests: + cpu: 5m + memory: 20Mi + env: + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + {{- end}} + nodeSelector: + beta.kubernetes.io/os: "linux" + volumes: + # for checking ovs-configuration service + - name: systemd-units + hostPath: + path: /etc/systemd/system + # used for iptables wrapper scripts + - name: host-slash + hostPath: + path: / + - name: host-run-netns + hostPath: + path: /run/netns + - name: var-lib-openvswitch + hostPath: + path: /var/lib/openvswitch/data + - name: etc-openvswitch + hostPath: + path: /etc/openvswitch + - name: run-openvswitch + hostPath: + path: /var/run/openvswitch + - name: run-ovn + hostPath: + path: /var/run/ovn + {{ if or (eq .OVN_NODE_MODE "full") (eq .OVN_NODE_MODE "smart-nic") }} + # Used for placement of ACL audit logs + - name: node-log + hostPath: + path: /var/log/ovn + - name: log-socket + hostPath: + path: /dev/log + {{ end }} + # For CNI server + - name: host-run-ovn-kubernetes + hostPath: + path: /run/ovn-kubernetes + - name: host-cni-bin + hostPath: + path: "{{.CNIBinDir}}" + - name: host-cni-netd + hostPath: + path: "{{.CNIConfDir}}" + - name: host-var-lib-cni-networks-ovn-kubernetes + hostPath: + path: /var/lib/cni/networks/ovn-k8s-cni-overlay + - name: ovnkube-config + configMap: + name: ovnkube-config + - name: env-overrides + configMap: + name: env-overrides + optional: true + - name: ovn-ca + configMap: + name: ovn-ca + - name: ovn-cert + secret: + secretName: ovn-cert + - name: ovn-node-metrics-cert + secret: + secretName: ovn-node-metrics-cert + optional: true + tolerations: + - operator: "Exists" diff --git a/bindata/network/ovn-kubernetes/self-hosted/004-config.yaml b/bindata/network/ovn-kubernetes/self-hosted/common/004-config.yaml similarity index 100% rename from bindata/network/ovn-kubernetes/self-hosted/004-config.yaml rename to bindata/network/ovn-kubernetes/self-hosted/common/004-config.yaml diff --git a/bindata/network/ovn-kubernetes/self-hosted/007-flowschema.yaml b/bindata/network/ovn-kubernetes/self-hosted/common/007-flowschema.yaml similarity index 100% rename from bindata/network/ovn-kubernetes/self-hosted/007-flowschema.yaml rename to bindata/network/ovn-kubernetes/self-hosted/common/007-flowschema.yaml diff --git a/bindata/network/ovn-kubernetes/self-hosted/alert-rules-control-plane.yaml b/bindata/network/ovn-kubernetes/self-hosted/common/alert-rules-control-plane.yaml similarity index 100% rename from bindata/network/ovn-kubernetes/self-hosted/alert-rules-control-plane.yaml rename to bindata/network/ovn-kubernetes/self-hosted/common/alert-rules-control-plane.yaml diff --git a/bindata/network/ovn-kubernetes/self-hosted/monitor-master.yaml b/bindata/network/ovn-kubernetes/self-hosted/common/monitor-master.yaml similarity index 91% rename from bindata/network/ovn-kubernetes/self-hosted/monitor-master.yaml rename to bindata/network/ovn-kubernetes/self-hosted/common/monitor-master.yaml index 28e43c6f17..b6dc5b344a 100644 --- a/bindata/network/ovn-kubernetes/self-hosted/monitor-master.yaml +++ b/bindata/network/ovn-kubernetes/self-hosted/common/monitor-master.yaml @@ -29,7 +29,7 @@ apiVersion: v1 kind: Service metadata: labels: - app: ovnkube-master + app: ovnkube-master ### TODO no longer relevant name: ovn-kubernetes-master namespace: openshift-ovn-kubernetes annotations: @@ -43,6 +43,6 @@ spec: - name: metrics port: 9102 protocol: TCP - targetPort: 9102 + targetPort: 9102 ### TODO it's now 9112 sessionAffinity: None type: ClusterIP diff --git a/bindata/network/ovn-kubernetes/self-hosted/multi-zone-interconnect/ovnkube-control-plane.yaml b/bindata/network/ovn-kubernetes/self-hosted/multi-zone-interconnect/ovnkube-control-plane.yaml new file mode 100644 index 0000000000..d3bd49a64b --- /dev/null +++ b/bindata/network/ovn-kubernetes/self-hosted/multi-zone-interconnect/ovnkube-control-plane.yaml @@ -0,0 +1,222 @@ +# The ovnkube control-plane components +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: ovnkube-master + namespace: openshift-ovn-kubernetes + annotations: + kubernetes.io/description: | + This daemonset launches the ovn-kubernetes controller (master) networking components. + release.openshift.io/version: "{{.ReleaseVersion}}" +spec: + selector: + matchLabels: + app: ovnkube-master + updateStrategy: + type: RollingUpdate + rollingUpdate: + # by default, Deployments spin up the new pod before terminating the old one + # but we don't want that - because ovsdb holds the lock. + maxSurge: 0 + maxUnavailable: 3 + template: + metadata: + annotations: + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + labels: + app: ovnkube-master + ovn-db-pod: "true" + component: network + type: infra + openshift.io/component: network + kubernetes.io/os: "linux" + spec: + serviceAccountName: ovn-kubernetes-controller + hostNetwork: true + dnsPolicy: Default + priorityClassName: "system-cluster-critical" + # volumes in all containers: + # (container) -> (host) + # /etc/openvswitch -> /var/lib/ovn/etc - ovsdb data + # /var/lib/openvswitch -> /var/lib/ovn/data - ovsdb pki state + # /run/openvswitch -> tmpfs - sockets + # /env -> configmap env-overrides - debug overrides + containers: + - name: kube-rbac-proxy + image: {{.KubeRBACProxyImage}} + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + TLS_PK=/etc/pki/tls/metrics-cert/tls.key + TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt + # As the secret mount is optional we must wait for the files to be present. + # The service is created in monitor.yaml and this is created in sdn.yaml. + TS=$(date +%s) + WARN_TS=$(( ${TS} + $(( 20 * 60)) )) + HAS_LOGGED_INFO=0 + + log_missing_certs(){ + CUR_TS=$(date +%s) + if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then + echo $(date -Iseconds) WARN: ovn-master-metrics-cert not mounted after 20 minutes. + elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then + echo $(date -Iseconds) INFO: ovn-master-metrics-cert not mounted. Waiting 20 minutes. + HAS_LOGGED_INFO=1 + fi + } + while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do + log_missing_certs + sleep 5 + done + + echo $(date -Iseconds) INFO: ovn-master-metrics-certs mounted, starting kube-rbac-proxy + exec /usr/bin/kube-rbac-proxy \ + --logtostderr \ + --secure-listen-address=:9106 \ + --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ + --upstream=http://127.0.0.1:29104/ \ + --tls-private-key-file=${TLS_PK} \ + --tls-cert-file=${TLS_CERT} + ports: + - containerPort: 9106 + name: https + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: ovn-master-metrics-cert + mountPath: /etc/pki/tls/metrics-cert + readOnly: True + + # ovnkube master: convert kubernetes objects in to nbdb logical network components + - name: ovnkube-control-plane + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + if [[ -f "/env/_master" ]]; then + set -o allexport + source "/env/_master" + set +o allexport + fi + + if [ "{{.OVN_GATEWAY_MODE}}" == "shared" ]; then + gateway_mode_flags="--gateway-mode shared --gateway-interface br-ex" + elif [ "{{.OVN_GATEWAY_MODE}}" == "local" ]; then + gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" + else + echo "Invalid OVN_GATEWAY_MODE: \"{{.OVN_GATEWAY_MODE}}\". Must be \"local\" or \"shared\"." + exit 1 + fi + + multi_network_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_ENABLE}}" == "true" ]]; then + multi_network_enabled_flag="--enable-multi-network" + fi + + echo "I$(date "+%m%d %H:%M:%S.%N") - ovnkube-control-plane - start ovnkube --init-master ${K8S_NODE}" + exec /usr/bin/ovnkube \ + --init-cluster-manager "${K8S_NODE}" \ + --config-file=/run/ovnkube-config/ovnkube.conf \ + --ovn-empty-lb-events \ + --loglevel "${OVN_KUBE_LOG_LEVEL}" \ + --metrics-bind-address "127.0.0.1:29104" \ + --metrics-enable-pprof \ + --metrics-enable-config-duration \ + ${gateway_mode_flags} \ + --enable-multicast \ + --disable-snat-multiple-gws \ + ${multi_network_enabled_flag} \ + --enable-interconnect \ + --acl-logging-rate-limit "{{.OVNPolicyAuditRateLimit}}" + volumeMounts: + # for checking ovs-configuration service + # - mountPath: /etc/systemd/system + # name: systemd-units + # readOnly: true + # - mountPath: /etc/openvswitch/ + # name: etc-openvswitch + # - mountPath: /etc/ovn/ + # name: etc-openvswitch + # - mountPath: /var/lib/openvswitch/ + # name: var-lib-openvswitch + # - mountPath: /run/openvswitch/ + # name: run-openvswitch + # - mountPath: /run/ovn/ + # name: run-ovn + - mountPath: /run/ovnkube-config/ + name: ovnkube-config + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + env: + - name: OVN_KUBE_LOG_LEVEL + value: "4" + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + ports: + - name: metrics-port + containerPort: 29104 + terminationMessagePolicy: FallbackToLogsOnError + nodeSelector: + node-role.kubernetes.io/master: "" + beta.kubernetes.io/os: "linux" + volumes: + # for checking ovs-configuration service + - name: systemd-units + hostPath: + path: /etc/systemd/system + - name: etc-openvswitch + hostPath: + path: /var/lib/ovn/etc + - name: var-lib-openvswitch + hostPath: + path: /var/lib/ovn/data + - name: run-openvswitch + hostPath: + path: /var/run/openvswitch + - name: run-ovn + hostPath: + path: /var/run/ovn + - name: ovnkube-config + configMap: + name: ovnkube-config + - name: env-overrides + configMap: + name: env-overrides + optional: true + - name: ovn-ca + configMap: + name: ovn-ca + - name: ovn-cert + secret: + secretName: ovn-cert + - name: ovn-master-metrics-cert + secret: + secretName: ovn-master-metrics-cert + optional: true + tolerations: + - key: "node-role.kubernetes.io/master" + operator: "Exists" + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + - key: "node.kubernetes.io/network-unavailable" + operator: "Exists" diff --git a/bindata/network/ovn-kubernetes/self-hosted/multi-zone-interconnect/ovnkube-node.yaml b/bindata/network/ovn-kubernetes/self-hosted/multi-zone-interconnect/ovnkube-node.yaml new file mode 100644 index 0000000000..c3f88990a5 --- /dev/null +++ b/bindata/network/ovn-kubernetes/self-hosted/multi-zone-interconnect/ovnkube-node.yaml @@ -0,0 +1,1044 @@ +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: ovnkube-node + namespace: openshift-ovn-kubernetes + annotations: + kubernetes.io/description: | + This daemonset launches the ovn-kubernetes per node networking components. + release.openshift.io/version: "{{.ReleaseVersion}}" +spec: + selector: + matchLabels: + app: ovnkube-node + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 10% + template: + metadata: + annotations: + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + labels: + app: ovnkube-node + component: network + type: infra + openshift.io/component: network + kubernetes.io/os: "linux" + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: network.operator.openshift.io/dpu-host + {{ if eq .OVN_NODE_MODE "dpu-host" }} + operator: Exists + {{ else }} + operator: DoesNotExist + {{ end }} + - key: network.operator.openshift.io/dpu + operator: DoesNotExist + serviceAccountName: ovn-kubernetes-controller + hostNetwork: true + dnsPolicy: Default + hostPID: true + priorityClassName: "system-node-critical" + # volumes in all containers: + # (container) -> (host) + # /etc/openvswitch -> /etc/openvswitch - ovsdb system id + # /var/lib/openvswitch -> /var/lib/openvswitch/data - ovsdb data + # /run/openvswitch -> tmpfs - ovsdb sockets + # /env -> configmap env-overrides - debug overrides + containers: + {{ if eq .OVN_NODE_MODE "full" }} + # ovn-controller: programs the vswitch with flows from the sbdb + - name: ovn-controller + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -e + if [[ -f "/env/${K8S_NODE}" ]]; then + set -o allexport + source "/env/${K8S_NODE}" + set +o allexport + fi + + echo "$(date -Iseconds) - starting ovn-controller" + exec ovn-controller unix:/var/run/openvswitch/db.sock -vfile:off \ + --no-chdir --pidfile=/var/run/ovn/ovn-controller.pid \ + --syslog-method="{{.OVNPolicyAuditDestination}}" \ + --log-file=/var/log/ovn/acl-audit-log.log \ + -vFACILITY:"{{.OVNPolicyAuditSyslogFacility}}" \ + -p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt \ + -vconsole:"${OVN_LOG_LEVEL}" -vconsole:"acl_log:off" \ + -vPATTERN:console:"{{.OVN_LOG_PATTERN_CONSOLE}}" \ + -vsyslog:"acl_log:info" \ + -vfile:"acl_log:info" + securityContext: + privileged: true + env: + - name: OVN_LOG_LEVEL + value: info + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - mountPath: /run/openvswitch + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /etc/openvswitch + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch + name: var-lib-openvswitch + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + - mountPath: /var/log/ovn/ + name: node-log + - mountPath: /dev/log + name: log-socket + terminationMessagePolicy: FallbackToLogsOnError + resources: + requests: + cpu: 10m + memory: 300Mi + - name: ovn-acl-logging + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -euo pipefail + + # Rotate audit log files when then get to max size (in bytes) + MAXFILESIZE=$(( "{{.OVNPolicyAuditMaxFileSize}}"*1000000 )) + LOGFILE=/var/log/ovn/acl-audit-log.log + CONTROLLERPID=$(cat /run/ovn/ovn-controller.pid) + + # Redirect err to null so no messages are shown upon rotation + tail -F ${LOGFILE} 2> /dev/null & + + while true + do + # Make sure ovn-controller's logfile exists, and get current size in bytes + if [ -f "$LOGFILE" ]; then + file_size=`du -b ${LOGFILE} | tr -s '\t' ' ' | cut -d' ' -f1` + else + ovs-appctl -t /var/run/ovn/ovn-controller.${CONTROLLERPID}.ctl vlog/reopen + file_size=`du -b ${LOGFILE} | tr -s '\t' ' ' | cut -d' ' -f1` + fi + + if [ $file_size -gt $MAXFILESIZE ];then + echo "Rotating OVN ACL Log File" + timestamp=`date '+%Y-%m-%dT%H-%M-%S'` + mv ${LOGFILE} /var/log/ovn/acl-audit-log.$timestamp.log + ovs-appctl -t /run/ovn/ovn-controller.${CONTROLLERPID}.ctl vlog/reopen + CONTROLLERPID=$(cat /run/ovn/ovn-controller.pid) + fi + + # sleep for 30 seconds to avoid wasting CPU + sleep 30 + done + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/log/ovn/ + name: node-log + - mountPath: /run/ovn/ + name: run-ovn + {{ end }} + - name: kube-rbac-proxy-node + image: {{.KubeRBACProxyImage}} + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + TLS_PK=/etc/pki/tls/metrics-cert/tls.key + TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt + # As the secret mount is optional we must wait for the files to be present. + # The service is created in monitor.yaml and this is created in sdn.yaml. + # If it isn't created there is probably an issue so we want to crashloop. + retries=0 + TS=$(date +%s) + WARN_TS=$(( ${TS} + $(( 20 * 60)) )) + HAS_LOGGED_INFO=0 + + log_missing_certs(){ + CUR_TS=$(date +%s) + if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then + echo $(date -Iseconds) WARN: ovn-node-metrics-cert not mounted after 20 minutes. + elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then + echo $(date -Iseconds) INFO: ovn-node-metrics-cert not mounted. Waiting one hour. + HAS_LOGGED_INFO=1 + fi + } + while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do + log_missing_certs + sleep 5 + done + + echo $(date -Iseconds) INFO: ovn-node-metrics-certs mounted, starting kube-rbac-proxy + exec /usr/bin/kube-rbac-proxy \ + --logtostderr \ + --secure-listen-address=:9103 \ + --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ + --upstream=http://127.0.0.1:29103/ \ + --tls-private-key-file=${TLS_PK} \ + --tls-cert-file=${TLS_CERT} + ports: + - containerPort: 9103 + name: https + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: ovn-node-metrics-cert + mountPath: /etc/pki/tls/metrics-cert + readOnly: True + - name: kube-rbac-proxy-ovn-metrics + image: {{.KubeRBACProxyImage}} + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + TLS_PK=/etc/pki/tls/metrics-cert/tls.key + TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt + # As the secret mount is optional we must wait for the files to be present. + # The service is created in monitor.yaml and this is created in sdn.yaml. + # If it isn't created there is probably an issue so we want to crashloop. + retries=0 + TS=$(date +%s) + WARN_TS=$(( ${TS} + $(( 20 * 60)) )) + HAS_LOGGED_INFO=0 + + log_missing_certs(){ + CUR_TS=$(date +%s) + if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then + echo $(date -Iseconds) WARN: ovn-node-metrics-cert not mounted after 20 minutes. + elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then + echo $(date -Iseconds) INFO: ovn-node-metrics-cert not mounted. Waiting one hour. + HAS_LOGGED_INFO=1 + fi + } + while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do + log_missing_certs + sleep 5 + done + + echo $(date -Iseconds) INFO: ovn-node-metrics-certs mounted, starting kube-rbac-proxy + exec /usr/bin/kube-rbac-proxy \ + --logtostderr \ + --secure-listen-address=:9105 \ + --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ + --upstream=http://127.0.0.1:29105/ \ + --tls-private-key-file=${TLS_PK} \ + --tls-cert-file=${TLS_CERT} + ports: + - containerPort: 9105 + name: https + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: ovn-node-metrics-cert + mountPath: /etc/pki/tls/metrics-cert + readOnly: True + # ovn-northd: convert network objects in nbdb to flows in sbdb + - name: northd + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xem + if [[ -f /env/_master ]]; then + set -o allexport + source /env/_master + set +o allexport + fi + + quit() { + echo "$(date -Iseconds) - stopping ovn-northd" + OVN_MANAGE_OVSDB=no /usr/share/ovn/scripts/ovn-ctl stop_northd + echo "$(date -Iseconds) - ovn-northd stopped" + rm -f /var/run/ovn/ovn-northd.pid + exit 0 + } + # end of quit + trap quit TERM INT + + echo "$(date -Iseconds) - starting ovn-northd" + exec ovn-northd \ + --no-chdir "-vconsole:${OVN_LOG_LEVEL}" -vfile:off "-vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + --pidfile /var/run/ovn/ovn-northd.pid & + + wait $! + lifecycle: + preStop: + exec: + command: + - OVN_MANAGE_OVSDB=no + - /usr/share/ovn/scripts/ovn-ctl + - stop_northd + env: + - name: OVN_LOG_LEVEL + value: info + volumeMounts: + - mountPath: /etc/openvswitch/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch/ + name: var-lib-openvswitch + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert # not needed, but useful when exec'ing in to pod. + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + terminationMessagePolicy: FallbackToLogsOnError + + # nbdb: the northbound, or logical network object DB. In standalone mode listening on unix socket. + - name: nbdb + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xem + if [[ -f /env/_master ]]; then + set -o allexport + source /env/_master + set +o allexport + fi + + quit() { + echo "$(date -Iseconds) - stopping nbdb" + /usr/share/ovn/scripts/ovn-ctl stop_nb_ovsdb + echo "$(date -Iseconds) - nbdb stopped" + rm -f /var/run/ovn/ovnnb_db.pid + exit 0 + } + # end of quit + trap quit TERM INT + + bracketify() { case "$1" in *:*) echo "[$1]" ;; *) echo "$1" ;; esac } + + # checks if a db pod is part of a current cluster + + OVN_ARGS="--no-monitor" + exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ + --ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + run_nb_ovsdb & + + wait $! + + lifecycle: + postStart: + exec: + command: + - /bin/bash + - -c + - | + set -x + rm -f /var/run/ovn/ovnnb_db.pid + #configure northd_probe_interval + OVN_NB_CTL="ovn-nbctl" + + echo "Setting the IC zone to ${K8S_NODE}" + retries=0 + current_probe_interval=0 + while [[ "${retries}" -lt 10 ]]; do + current_probe_interval=$(${OVN_NB_CTL} set NB_Global . name="${K8S_NODE}" options:name="${K8S_NODE}") + if [[ $? == 0 ]]; then + current_probe_interval=$(echo ${current_probe_interval} | tr -d '\"') + break + else + sleep 2 + (( retries += 1 )) + fi + done + + northd_probe_interval=${OVN_NORTHD_PROBE_INTERVAL:-5000} + echo "Setting northd probe interval to ${northd_probe_interval} ms" + retries=0 + current_probe_interval=0 + while [[ "${retries}" -lt 10 ]]; do + current_probe_interval=$(${OVN_NB_CTL} --if-exists get NB_GLOBAL . options:northd_probe_interval) + if [[ $? == 0 ]]; then + current_probe_interval=$(echo ${current_probe_interval} | tr -d '\"') + break + else + sleep 2 + (( retries += 1 )) + fi + done + + if [[ "${current_probe_interval}" != "${northd_probe_interval}" ]]; then + retries=0 + while [[ "${retries}" -lt 10 ]]; do + ${OVN_NB_CTL} set NB_GLOBAL . options:northd_probe_interval=${northd_probe_interval} + if [[ $? != 0 ]]; then + echo "Failed to set northd probe interval to ${northd_probe_interval}. retrying....." + sleep 2 + (( retries += 1 )) + else + echo "Successfully set northd probe interval to ${northd_probe_interval} ms" + break + fi + done + fi + + preStop: + exec: + command: + - /usr/bin/ovn-appctl + - -t + - /var/run/ovn/ovnnb_db.ctl + - exit + readinessProbe: +{{ if not .IsSNO }} + initialDelaySeconds: 90 +{{ end }} + timeoutSeconds: 5 + exec: + command: + - /bin/bash + - -c + - | + set -xeo pipefail + status=$(/usr/bin/ovn-appctl -t /var/run/ovn/ovnnb_db.ctl --timeout=3 ovsdb-server/sync-status 2>/dev/null | { grep "state: active" || false; }) + if [[ -z "${status}" ]]; then + echo "NB DB is not running or active." + exit 1 + fi + + env: + - name: OVN_LOG_LEVEL + value: info + - name: OVN_NORTHD_PROBE_INTERVAL + value: "{{.OVN_NORTHD_PROBE_INTERVAL}}" + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - mountPath: /etc/openvswitch/ + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch/ + name: var-lib-openvswitch + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + terminationMessagePolicy: FallbackToLogsOnError + + # sbdb: the southbound, or logical network object DB. In standalone mode listening on unix socket + - name: sbdb + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xem + if [[ -f /env/_master ]]; then + set -o allexport + source /env/_master + set +o allexport + fi + + quit() { + echo "$(date -Iseconds) - stopping sbdb" + /usr/share/ovn/scripts/ovn-ctl stop_sb_ovsdb + echo "$(date -Iseconds) - sbdb stopped" + rm -f /var/run/ovn/ovnsb_db.pid + exit 0 + } + # end of quit + trap quit TERM INT + + bracketify() { case "$1" in *:*) echo "[$1]" ;; *) echo "$1" ;; esac } + + OVN_ARGS="--no-monitor" + exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ + --ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + run_sb_ovsdb & + + wait $! + + lifecycle: + postStart: + exec: + command: + - /bin/bash + - -c + - | + set -x + rm -f /var/run/ovn/ovnsb_db.pid + + preStop: + exec: + command: + - /usr/bin/ovn-appctl + - -t + - /var/run/ovn/ovnsb_db.ctl + - exit + readinessProbe: +{{ if not .IsSNO }} + initialDelaySeconds: 90 +{{ end }} + timeoutSeconds: 5 + exec: + command: + - /bin/bash + - -c + - | + set -xeo pipefail + status=$(/usr/bin/ovn-appctl -t /var/run/ovn/ovnsb_db.ctl --timeout=3 ovsdb-server/sync-status 2>/dev/null | { grep "state: active" || false; }) + if [[ -z "${status}" ]]; then + echo "SB DB is not running or active." + exit 1 + fi + + env: + - name: OVN_LOG_LEVEL + value: info + volumeMounts: + - mountPath: /etc/openvswitch/ + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch/ + name: var-lib-openvswitch + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + terminationMessagePolicy: FallbackToLogsOnError + + - name: kube-rbac-proxy-ncm + image: {{.KubeRBACProxyImage}} + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + TLS_PK=/etc/pki/tls/metrics-cert/tls.key + TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt + # As the secret mount is optional we must wait for the files to be present. + # The service is created in monitor.yaml and this is created in sdn.yaml. + TS=$(date +%s) + WARN_TS=$(( ${TS} + $(( 20 * 60)) )) + HAS_LOGGED_INFO=0 + + log_missing_certs(){ + CUR_TS=$(date +%s) + if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then + echo $(date -Iseconds) WARN: ovn-master-metrics-cert not mounted after 20 minutes. + elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then + echo $(date -Iseconds) INFO: ovn-master-metrics-cert not mounted. Waiting 20 minutes. + HAS_LOGGED_INFO=1 + fi + } + while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do + log_missing_certs + sleep 5 + done + + echo $(date -Iseconds) INFO: ovn-master-metrics-certs mounted, starting kube-rbac-proxy + exec /usr/bin/kube-rbac-proxy \ + --logtostderr \ + --secure-listen-address=:9113 \ + --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ + --upstream=http://127.0.0.1:29113/ \ + --tls-private-key-file=${TLS_PK} \ + --tls-cert-file=${TLS_CERT} + ports: + - containerPort: 9113 + name: https + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: ovn-master-metrics-cert + mountPath: /etc/pki/tls/metrics-cert + readOnly: True + + # ovnkube-controller in multizone mode: convert kubernetes objects in to nbdb logical network components + - name: ovnkube-controller + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + if [[ -f "/env/_local" ]]; then + set -o allexport + source "/env/_local" + set +o allexport + fi + + if [ "{{.OVN_GATEWAY_MODE}}" == "shared" ]; then + gateway_mode_flags="--gateway-mode shared --gateway-interface br-ex" + elif [ "{{.OVN_GATEWAY_MODE}}" == "local" ]; then + gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" + else + echo "Invalid OVN_GATEWAY_MODE: \"{{.OVN_GATEWAY_MODE}}\". Must be \"local\" or \"shared\"." + exit 1 + fi + + multi_network_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_ENABLE}}" == "true" ]]; then + multi_network_enabled_flag="--enable-multi-network" + fi + + multi_network_policy_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_POLICY_ENABLE}}" == "true" ]]; then + multi_network_policy_enabled_flag="--enable-multi-networkpolicy" + fi + + echo "I$(date "+%m%d %H:%M:%S.%N") - ovnkube-controller - start ovnkube init-ovnkube-controller ${K8S_NODE}" + exec /usr/bin/ovnkube \ + --init-ovnkube-controller "${K8S_NODE}" \ + --config-file=/run/ovnkube-config/ovnkube.conf \ + --ovn-empty-lb-events \ + --loglevel "${OVN_KUBE_LOG_LEVEL}" \ + --metrics-bind-address "127.0.0.1:29113" \ + --metrics-enable-pprof \ + --metrics-enable-config-duration \ + ${gateway_mode_flags} \ + --disable-snat-multiple-gws \ + ${multi_network_enabled_flag} \ + ${multi_network_policy_enabled_flag} \ + --enable-multicast \ + --zone ${K8S_NODE} \ + --enable-interconnect \ + --acl-logging-rate-limit "{{.OVNPolicyAuditRateLimit}}" + volumeMounts: + # for checking ovs-configuration service + - mountPath: /etc/systemd/system + name: systemd-units + readOnly: true + - mountPath: /etc/openvswitch/ + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch/ + name: var-lib-openvswitch + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /run/ovnkube-config/ + name: ovnkube-config + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + env: + - name: OVN_KUBE_LOG_LEVEL + value: "4" + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + ports: + - name: metrics-port + containerPort: 29113 + terminationMessagePolicy: FallbackToLogsOnError + + # ovnkube-node: does node-level bookkeeping and configuration + - name: ovnkube-node + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + if [[ -f "/env/${K8S_NODE}" ]]; then + set -o allexport + source "/env/${K8S_NODE}" + set +o allexport + fi + cp -f /usr/libexec/cni/ovn-k8s-cni-overlay /cni-bin-dir/ + ovn_config_namespace=openshift-ovn-kubernetes + echo "I$(date "+%m%d %H:%M:%S.%N") - disable conntrack on geneve port" + iptables -t raw -A PREROUTING -p udp --dport {{.GenevePort}} -j NOTRACK + iptables -t raw -A OUTPUT -p udp --dport {{.GenevePort}} -j NOTRACK + ip6tables -t raw -A PREROUTING -p udp --dport {{.GenevePort}} -j NOTRACK + ip6tables -t raw -A OUTPUT -p udp --dport {{.GenevePort}} -j NOTRACK + echo "I$(date "+%m%d %H:%M:%S.%N") - starting ovnkube-node" + + if [ "{{.OVN_GATEWAY_MODE}}" == "shared" ]; then + gateway_mode_flags="--gateway-mode shared --gateway-interface br-ex" + elif [ "{{.OVN_GATEWAY_MODE}}" == "local" ]; then + gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" + else + echo "Invalid OVN_GATEWAY_MODE: \"{{.OVN_GATEWAY_MODE}}\". Must be \"local\" or \"shared\"." + exit 1 + fi + + export_network_flows_flags= + if [[ -n "${NETFLOW_COLLECTORS}" ]] ; then + export_network_flows_flags="--netflow-targets ${NETFLOW_COLLECTORS}" + fi + if [[ -n "${SFLOW_COLLECTORS}" ]] ; then + export_network_flows_flags="$export_network_flows_flags --sflow-targets ${SFLOW_COLLECTORS}" + fi + if [[ -n "${IPFIX_COLLECTORS}" ]] ; then + export_network_flows_flags="$export_network_flows_flags --ipfix-targets ${IPFIX_COLLECTORS}" + fi + if [[ -n "${IPFIX_CACHE_MAX_FLOWS}" ]] ; then + export_network_flows_flags="$export_network_flows_flags --ipfix-cache-max-flows ${IPFIX_CACHE_MAX_FLOWS}" + fi + if [[ -n "${IPFIX_CACHE_ACTIVE_TIMEOUT}" ]] ; then + export_network_flows_flags="$export_network_flows_flags --ipfix-cache-active-timeout ${IPFIX_CACHE_ACTIVE_TIMEOUT}" + fi + if [[ -n "${IPFIX_SAMPLING}" ]] ; then + export_network_flows_flags="$export_network_flows_flags --ipfix-sampling ${IPFIX_SAMPLING}" + fi + gw_interface_flag= + # if br-ex1 is configured on the node, we want to use it for external gateway traffic + if [ -d /sys/class/net/br-ex1 ]; then + gw_interface_flag="--exgw-interface=br-ex1" + fi + + node_mgmt_port_netdev_flags= + if [[ -n "${OVNKUBE_NODE_MGMT_PORT_NETDEV}" ]] ; then + node_mgmt_port_netdev_flags="--ovnkube-node-mgmt-port-netdev ${OVNKUBE_NODE_MGMT_PORT_NETDEV}" + fi + + multi_network_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_ENABLE}}" == "true" ]]; then + multi_network_enabled_flag="--enable-multi-network" + fi + + multi_network_policy_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_POLICY_ENABLE}}" == "true" ]]; then + multi_network_policy_enabled_flag="--enable-multi-networkpolicy" + fi + + exec /usr/bin/ovnkube --init-node "${K8S_NODE}" \ + --config-file=/run/ovnkube-config/ovnkube.conf \ + --loglevel "${OVN_KUBE_LOG_LEVEL}" \ + --inactivity-probe="${OVN_CONTROLLER_INACTIVITY_PROBE}" \ + ${gateway_mode_flags} \ + ${node_mgmt_port_netdev_flags} \ + {{- if eq .OVN_NODE_MODE "dpu-host" }} + --ovnkube-node-mode dpu-host \ + {{- end }} + --metrics-bind-address "127.0.0.1:29103" \ + --ovn-metrics-bind-address "127.0.0.1:29105" \ + --metrics-enable-pprof \ + --export-ovs-metrics \ + --disable-snat-multiple-gws \ + ${export_network_flows_flags} \ + ${multi_network_enabled_flag} \ + ${multi_network_policy_enabled_flag} \ + --zone ${K8S_NODE} \ + --enable-interconnect \ + ${gw_interface_flag} + env: + # for kubectl + - name: KUBERNETES_SERVICE_PORT + value: "{{.KUBERNETES_SERVICE_PORT}}" + - name: KUBERNETES_SERVICE_HOST + value: "{{.KUBERNETES_SERVICE_HOST}}" + - name: OVN_CONTROLLER_INACTIVITY_PROBE + value: "{{.OVN_CONTROLLER_INACTIVITY_PROBE}}" + - name: OVN_KUBE_LOG_LEVEL + value: "4" + {{ if .NetFlowCollectors }} + - name: NETFLOW_COLLECTORS + value: "{{.NetFlowCollectors}}" + {{ end }} + {{ if .SFlowCollectors }} + - name: SFLOW_COLLECTORS + value: "{{.SFlowCollectors}}" + {{ end }} + {{ if .IPFIXCollectors }} + - name: IPFIX_COLLECTORS + value: "{{.IPFIXCollectors}}" + {{ end }} + {{ if .IPFIXCacheMaxFlows }} + - name: IPFIX_CACHE_MAX_FLOWS + value: "{{.IPFIXCacheMaxFlows}}" + {{ end }} + {{ if .IPFIXCacheActiveTimeout }} + - name: IPFIX_CACHE_ACTIVE_TIMEOUT + value: "{{.IPFIXCacheActiveTimeout}}" + {{ end }} + {{ if .IPFIXSampling }} + - name: IPFIX_SAMPLING + value: "{{.IPFIXSampling}}" + {{ end }} + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + ports: + - name: metrics-port + containerPort: 29103 + securityContext: + privileged: true + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + # for checking ovs-configuration service + - mountPath: /etc/systemd/system + name: systemd-units + readOnly: true + # for the iptables wrapper + - mountPath: /host + name: host-slash + readOnly: true + mountPropagation: HostToContainer + # for the CNI server socket + - mountPath: /run/ovn-kubernetes/ + name: host-run-ovn-kubernetes + # accessing bind-mounted net namespaces + - mountPath: /run/netns + name: host-run-netns + readOnly: true + mountPropagation: HostToContainer + # for installing the CNI plugin binary + - mountPath: /cni-bin-dir + name: host-cni-bin + # for installing the CNI configuration file + - mountPath: /etc/cni/net.d + name: host-cni-netd + # Where we store IP allocations + - mountPath: /var/lib/cni/networks/ovn-k8s-cni-overlay + name: host-var-lib-cni-networks-ovn-kubernetes + - mountPath: /run/openvswitch + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /etc/openvswitch + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch + name: var-lib-openvswitch + - mountPath: /run/ovnkube-config/ + name: ovnkube-config + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + lifecycle: + preStop: + exec: + command: ["rm","-f","/etc/cni/net.d/10-ovn-kubernetes.conf"] + readinessProbe: + exec: + command: ["test", "-f", "/etc/cni/net.d/10-ovn-kubernetes.conf"] + initialDelaySeconds: 5 + periodSeconds: 30 + {{- if .OVNPlatformAzure}} + - name: drop-icmp + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + + touch /var/run/ovn/add_iptables.sh + chmod 0755 /var/run/ovn/add_iptables.sh + cat <<'EOF' > /var/run/ovn/add_iptables.sh + #!/bin/sh + if [ -z "$3" ] + then + echo "Called with host address missing, ignore" + exit 0 + fi + echo "Adding ICMP drop rule for '$3' " + if iptables -C CHECK_ICMP_SOURCE -p icmp -s $3 -j ICMP_ACTION + then + echo "iptables already set for $3" + else + iptables -A CHECK_ICMP_SOURCE -p icmp -s $3 -j ICMP_ACTION + fi + EOF + + echo "I$(date "+%m%d %H:%M:%S.%N") - drop-icmp - start drop-icmp ${K8S_NODE}" + iptables -X CHECK_ICMP_SOURCE || true + iptables -N CHECK_ICMP_SOURCE || true + iptables -F CHECK_ICMP_SOURCE + iptables -D INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE || true + iptables -I INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE + iptables -N ICMP_ACTION || true + iptables -F ICMP_ACTION + iptables -A ICMP_ACTION -j LOG + iptables -A ICMP_ACTION -j DROP + # + ip addr show + ip route show + iptables -nvL + iptables -nvL -t nat + oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_iptables.sh + #systemd-run -qPG -- oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_iptables.sh + lifecycle: + preStop: + exec: + command: ["/bin/bash", "-c", "echo drop-icmp done"] + securityContext: + privileged: true + volumeMounts: + # for the iptables wrapper + - mountPath: /host + name: host-slash + readOnly: true + mountPropagation: HostToContainer + - mountPath: /run/ovn/ + name: run-ovn + resources: + requests: + cpu: 5m + memory: 20Mi + env: + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + {{- end}} + nodeSelector: + beta.kubernetes.io/os: "linux" + volumes: + # for checking ovs-configuration service + - name: systemd-units + hostPath: + path: /etc/systemd/system + # used for iptables wrapper scripts + - name: host-slash + hostPath: + path: / + - name: host-run-netns + hostPath: + path: /run/netns + - name: var-lib-openvswitch + hostPath: + path: /var/lib/openvswitch/data + - name: etc-openvswitch + hostPath: + path: /var/lib/ovn-ic/etc # different path than 4.13 and single-zone 4.14 + - name: run-openvswitch + hostPath: + path: /var/run/openvswitch + - name: run-ovn + hostPath: + path: /var/run/ovn-ic # different path than 4.13 and single-zone 4.14 + {{ if eq .OVN_NODE_MODE "full" }} + # Used for placement of ACL audit logs + - name: node-log + hostPath: + path: /var/log/ovn-ic # different path than 4.13 and single-zone 4.14 + - name: log-socket + hostPath: + path: /dev/log + {{ end }} + # For CNI server + - name: host-run-ovn-kubernetes + hostPath: + path: /run/ovn-kubernetes + - name: host-cni-bin + hostPath: + path: "{{.CNIBinDir}}" + - name: host-cni-netd + hostPath: + path: "{{.CNIConfDir}}" + - name: host-var-lib-cni-networks-ovn-kubernetes + hostPath: + path: /var/lib/cni/networks/ovn-k8s-cni-overlay + - name: ovnkube-config + configMap: + name: ovnkube-config + - name: env-overrides + configMap: + name: env-overrides + optional: true + - name: ovn-ca + configMap: + name: ovn-ca + - name: ovn-cert + secret: + secretName: ovn-cert + - name: ovn-node-metrics-cert + secret: + secretName: ovn-node-metrics-cert + optional: true + - name: ovn-master-metrics-cert + secret: + secretName: ovn-master-metrics-cert + optional: true + tolerations: + - operator: "Exists" diff --git a/bindata/network/ovn-kubernetes/self-hosted/005-service.yaml b/bindata/network/ovn-kubernetes/self-hosted/single-zone-interconnect/005-service.yaml similarity index 100% rename from bindata/network/ovn-kubernetes/self-hosted/005-service.yaml rename to bindata/network/ovn-kubernetes/self-hosted/single-zone-interconnect/005-service.yaml diff --git a/bindata/network/ovn-kubernetes/self-hosted/ovnkube-master.yaml b/bindata/network/ovn-kubernetes/self-hosted/single-zone-interconnect/ovnkube-master.yaml similarity index 98% rename from bindata/network/ovn-kubernetes/self-hosted/ovnkube-master.yaml rename to bindata/network/ovn-kubernetes/self-hosted/single-zone-interconnect/ovnkube-master.yaml index 106301052a..076781f1d3 100644 --- a/bindata/network/ovn-kubernetes/self-hosted/ovnkube-master.yaml +++ b/bindata/network/ovn-kubernetes/self-hosted/single-zone-interconnect/ovnkube-master.yaml @@ -306,6 +306,22 @@ spec: exit 0 fi + OVN_NB_CTL="ovn-nbctl" + echo "Setting the IC zone to global" + retries=0 + current_probe_interval=0 + while [[ "${retries}" -lt 10 ]]; do + current_probe_interval=$(${OVN_NB_CTL} set NB_Global . name="global" options:name="global") + if [[ $? == 0 ]]; then + current_probe_interval=$(echo ${current_probe_interval} | tr -d '\"') + break + else + sleep 2 + (( retries += 1 )) + fi + done + + # retry an operation a number of times, sleeping 2 seconds between each try retry() { local tries=${1} @@ -840,6 +856,8 @@ spec: echo "I$(date "+%m%d %H:%M:%S.%N") - ovnkube-master - start ovnkube --init-master ${K8S_NODE}" exec /usr/bin/ovnkube \ + --enable-interconnect \ + --zone global \ --init-master "${K8S_NODE}" \ --config-file=/run/ovnkube-config/ovnkube.conf \ --ovn-empty-lb-events \ @@ -1018,4 +1036,3 @@ spec: - key: "node.kubernetes.io/unreachable" operator: "Exists" - key: "node.kubernetes.io/network-unavailable" - operator: "Exists" diff --git a/bindata/network/ovn-kubernetes/self-hosted/ovnkube-node.yaml b/bindata/network/ovn-kubernetes/self-hosted/single-zone-interconnect/ovnkube-node.yaml similarity index 99% rename from bindata/network/ovn-kubernetes/self-hosted/ovnkube-node.yaml rename to bindata/network/ovn-kubernetes/self-hosted/single-zone-interconnect/ovnkube-node.yaml index 3bd2460d2e..f8d85c11f2 100644 --- a/bindata/network/ovn-kubernetes/self-hosted/ovnkube-node.yaml +++ b/bindata/network/ovn-kubernetes/self-hosted/single-zone-interconnect/ovnkube-node.yaml @@ -374,6 +374,8 @@ spec: fi exec /usr/bin/ovnkube --init-node "${K8S_NODE}" \ + --enable-interconnect \ + --zone global \ --nb-address "{{.OVN_NB_DB_LIST}}" \ --sb-address "{{.OVN_SB_DB_LIST}}" \ --nb-client-privkey /ovn-cert/tls.key \ diff --git a/pkg/bootstrap/types.go b/pkg/bootstrap/types.go index d07533ba5a..a8cceff7e1 100644 --- a/pkg/bootstrap/types.go +++ b/pkg/bootstrap/types.go @@ -54,12 +54,13 @@ type OVNConfigBoostrapResult struct { // OVNUpdateStatus contains the status of existing daemonset // or statefulset that are maily used by upgrade process type OVNUpdateStatus struct { - Kind string - Namespace string - Name string - Version string - IPFamilyMode string - Progressing bool + Kind string + Namespace string + Name string + Version string + IPFamilyMode string + Progressing bool + InterConnectZoneMode string } type OVNBootstrapResult struct { diff --git a/pkg/controller/statusmanager/pod_status.go b/pkg/controller/statusmanager/pod_status.go index c0717e52bb..e12525137e 100644 --- a/pkg/controller/statusmanager/pod_status.go +++ b/pkg/controller/statusmanager/pod_status.go @@ -13,11 +13,14 @@ import ( configv1 "github.com/openshift/api/config/v1" operv1 "github.com/openshift/api/operator/v1" "github.com/openshift/cluster-network-operator/pkg/names" + "github.com/openshift/cluster-network-operator/pkg/util" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/klog/v2" crclient "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -102,7 +105,7 @@ func (status *StatusManager) SetFromPods() { if !isNonCritical(ds) { hung = append(hung, status.CheckCrashLoopBackOffPods(dsName, ds.Spec.Selector.MatchLabels, "DaemonSet")...) } - } else if ds.Status.NumberAvailable == 0 { // NOTE: update this if we ever expect empty (unscheduled) daemonsets ~cdc + } else if ds.Status.NumberAvailable == 0 && ds.Status.DesiredNumberScheduled > 0 { progressing = append(progressing, fmt.Sprintf("DaemonSet %q is not yet scheduled on any nodes", dsName.String())) dsProgressing = true } else if ds.Generation > ds.Status.ObservedGeneration { @@ -110,8 +113,29 @@ func (status *StatusManager) SetFromPods() { dsProgressing = true } - if ds.Annotations["release.openshift.io/version"] != targetLevel { + // hack for 2-phase upgrade from non-IC to IC versions: + // don't update the version field until phase 2 is over + twoPhaseUpgradeIsOngoing := false + icConfigMap, err := util.GetInterConnectConfigMap(status.client.ClientFor("").Kubernetes()) + if err == nil { + // while the upgrade is ongoing, the IC configmap exists and exhibits a fromVersion value. + // When phase 2 is over, the configmap is deleted. + if fromVersion, ok := icConfigMap.Data["from-version"]; ok && fromVersion != "" { + twoPhaseUpgradeIsOngoing = true + klog.Infof("riccardo: two-phase upgrade is ongoing") + } + } else { + if errors.IsNotFound(err) { + klog.Infof("riccardo: IC configmap not found, ") // TODO remove this + } else { + klog.Errorf("riccardo: could not fetch IC configmap") // TODO remove this + + } + } + + if ds.Annotations["release.openshift.io/version"] != targetLevel || twoPhaseUpgradeIsOngoing { reachedAvailableLevel = false + klog.Infof("riccardo: two-phase upgrade is ongoing: reachedAvailableLevel=false") } var dsHung *string diff --git a/pkg/network/ovn_kubernetes.go b/pkg/network/ovn_kubernetes.go index b8831cd86e..9227766cbc 100644 --- a/pkg/network/ovn_kubernetes.go +++ b/pkg/network/ovn_kubernetes.go @@ -2,6 +2,7 @@ package network import ( "context" + "encoding/json" "fmt" "log" "math" @@ -26,6 +27,7 @@ import ( "github.com/openshift/cluster-network-operator/pkg/names" "github.com/openshift/cluster-network-operator/pkg/platform" "github.com/openshift/cluster-network-operator/pkg/render" + "github.com/openshift/cluster-network-operator/pkg/util" iputil "github.com/openshift/cluster-network-operator/pkg/util/ip" "github.com/openshift/cluster-network-operator/pkg/util/k8s" hyperv1 "github.com/openshift/hypershift/api/v1beta1" @@ -37,8 +39,10 @@ import ( uns "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" - types "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/util/retry" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" crclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -84,7 +88,7 @@ const ( // - the ovnkube-node daemonset // - the ovnkube-master deployment // and some other small things. -func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.BootstrapResult, manifestDir string) ([]*uns.Unstructured, bool, error) { +func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.BootstrapResult, manifestDir string, client cnoclient.Client) ([]*uns.Unstructured, bool, error) { var progressing bool // TODO: Fix operator behavior when running in a cluster with an externalized control plane. @@ -100,6 +104,17 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo apiServer := bootstrapResult.Infra.APIServers[bootstrap.APIServerDefault] localAPIServer := bootstrapResult.Infra.APIServers[bootstrap.APIServerDefaultLocal] + targetZoneMode, err := getTargetInterConnectZoneMode(client) + if err != nil || targetZoneMode.zoneMode == zoneModeUndefined { + return nil, progressing, errors.Wrap( + err, "failed to render manifests, could not determine interconnect zone") + } + + progressing, err = prepareUpgradeToInterConnect(bootstrapResult.OVN, client, &targetZoneMode, bootstrapResult.OVN.OVNKubernetesConfig.HyperShiftConfig.Enabled) + if err != nil { + return nil, progressing, fmt.Errorf("failed to render manifests: %w", err) + } + // render the manifests on disk data := render.MakeRenderData() data.Data["ReleaseVersion"] = os.Getenv("RELEASE_VERSION") @@ -376,11 +391,23 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo var manifestSubDir string manifestDirs := make([]string, 0, 2) manifestDirs = append(manifestDirs, filepath.Join(manifestDir, "network/ovn-kubernetes/common")) + if bootstrapResult.OVN.OVNKubernetesConfig.HyperShiftConfig.Enabled { - manifestSubDir = "network/ovn-kubernetes/managed" + //manifestSubDir = "network/ovn-kubernetes/managed" + manifestDirs = append(manifestDirs, filepath.Join(manifestDir, "network/ovn-kubernetes/managed/common")) + //manifestDirs = append(manifestDirs, filepath.Join(manifestDir, manifestSubDir)) + manifestSubDir = "network/ovn-kubernetes/managed/multi-zone-interconnect" // default is multizone + if targetZoneMode.zoneMode == zoneModeSingleZone { // non-default, internal use only + manifestSubDir = "network/ovn-kubernetes/managed/single-zone-interconnect" + } manifestDirs = append(manifestDirs, filepath.Join(manifestDir, manifestSubDir)) + } else { - manifestSubDir = "network/ovn-kubernetes/self-hosted" + manifestDirs = append(manifestDirs, filepath.Join(manifestDir, "network/ovn-kubernetes/self-hosted/common")) + manifestSubDir = "network/ovn-kubernetes/self-hosted/multi-zone-interconnect" // default is multizone + if targetZoneMode.zoneMode == zoneModeSingleZone { // non-default, internal use only + manifestSubDir = "network/ovn-kubernetes/self-hosted/single-zone-interconnect" + } manifestDirs = append(manifestDirs, filepath.Join(manifestDir, manifestSubDir)) } @@ -459,7 +486,12 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo return nil, progressing, errors.Wrapf(err, "failed to set %s annotation on daemonsets or statefulsets", clusterNetworkCIDRs) } - // don't process upgrades if we are handling a dual-stack conversion. + // don't process interconnect zone mode change if we are handling a dual-stack conversion. + if updateMaster && updateNode { + updateNode, updateMaster = shouldUpdateOVNKonInterConnectZoneModeChange(bootstrapResult.OVN, targetZoneMode.zoneMode) + } + + // don't process upgrades if we are handling an interconnect zone mode change if updateMaster && updateNode { updateNode, updateMaster = shouldUpdateOVNKonUpgrade(bootstrapResult.OVN, os.Getenv("RELEASE_VERSION")) } @@ -499,11 +531,11 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo if !renderPrePull { // remove prepull from the list of objects to render. - objs = k8s.RemoveObjByGroupKindName(objs, "apps", "DaemonSet", "openshift-ovn-kubernetes", "ovnkube-upgrades-prepuller") + objs = k8s.RemoveObjByGroupKindName(objs, "apps", "DaemonSet", util.OVN_NAMESPACE, "ovnkube-upgrades-prepuller") } if bootstrapResult.OVN.OVNKubernetesConfig.HyperShiftConfig.Enabled && bootstrapResult.OVN.OVNKubernetesConfig.HyperShiftConfig.OVNSbDbRouteHost == "" { - k8s.UpdateObjByGroupKindName(objs, "apps", "DaemonSet", "openshift-ovn-kubernetes", "ovnkube-node", func(o *uns.Unstructured) { + k8s.UpdateObjByGroupKindName(objs, "apps", "DaemonSet", util.OVN_NAMESPACE, "ovnkube-node", func(o *uns.Unstructured) { anno := o.GetAnnotations() if anno == nil { anno = map[string]string{} @@ -1091,6 +1123,7 @@ func (l nodeInfoList) Less(i, j int) bool { // getMasterAddresses determines the addresses (IP or DNS names) of the ovn-kubernetes // control plane nodes. It returns the list of addresses and an updated timeout, // or an error. +// TODO revisit this if necessary func getMasterAddresses(kubeClient crclient.Client, controlPlaneReplicaCount int, hypershift bool, timeout int) ([]string, int, error) { var heartBeat int masterNodeList := &corev1.NodeList{} @@ -1167,6 +1200,75 @@ func getMasterAddresses(kubeClient crclient.Client, controlPlaneReplicaCount int return ovnMasterAddresses, timeout, nil } +// TODO move this to the top of the file +type InterConnectZoneMode string + +const ( + zoneModeMultiZone InterConnectZoneMode = "multizone" // every node is assigned a different zone + zoneModeSingleZone InterConnectZoneMode = "singlezone" // all nodes are assigned to one zone + zoneModeUndefined InterConnectZoneMode = "undefined" // the cluster is neither multizone nor onezone +) + +type targetZoneModeType struct { + // zoneMode indicates the target zone mode that CNO is supposed to converge to. + zoneMode InterConnectZoneMode + // "temporary", if true, marks that the target zone mode is only temporary; + // allowing us to switch first to single-zone mode during upgrades to versions with IC support (>= 4.14) + temporary bool + // "configMapFound" indicates whether the interconnect configmap was found; when not found, + // the zone mode defaults to multizone. + configMapFound bool + // "fromVersion" is used during upgrades from versions with no IC support (<= 4.13) to keep track of + // which version the cluster is upgrading from; CNO will show fromVersion in the version field in its operator status + // all throughout the 2-phase upgrade. + fromVersion string +} + +// getTargetInterConnectZoneMode determines the desired interconnect zone mode for the cluster. +// Available modes are two: multizone (default, one node per zone) and single zone (all nodes in the same zone). +// A configmap is looked up in order to switch to non-default single zone. In absence of this configmap, multizone is applied. +func getTargetInterConnectZoneMode(kubeClient cnoclient.Client) (targetZoneModeType, error) { + targetZoneMode := targetZoneModeType{} + + interConnectConfigMap, err := util.GetInterConnectConfigMap(kubeClient.ClientFor("").Kubernetes()) + if err != nil { + if apierrors.IsNotFound(err) { + klog.Infof("riccardo: No OVN InterConnect configMap found, applying default: multizone") + targetZoneMode.zoneMode = zoneModeMultiZone + return targetZoneMode, nil + } + return targetZoneMode, fmt.Errorf("riccardo: Unable to bootstrap OVN, unable to retrieve interconnect configMap: %v", err) + } + targetZoneMode.configMapFound = true + if zoneModeFromConfigMap, ok := interConnectConfigMap.Data["zone-mode"]; ok { + switch strings.ToLower(zoneModeFromConfigMap) { + case string(zoneModeSingleZone): + targetZoneMode.zoneMode = zoneModeSingleZone + case string(zoneModeMultiZone): + targetZoneMode.zoneMode = zoneModeMultiZone + default: + klog.Infof("[getTargetInterConnectZoneMode] riccardo: zoneModeFromConfigMap=%s, defaulting to multizone", + zoneModeFromConfigMap) + targetZoneMode.zoneMode = zoneModeMultiZone // default + } + } else { + klog.Infof("[getTargetInterConnectZoneMode] riccardo: no valid value in configMap, defaulting to multizone") + targetZoneMode.zoneMode = zoneModeMultiZone // default + } + + if temporaryFromConfigMap, ok := interConnectConfigMap.Data["temporary"]; ok { + targetZoneMode.temporary = strings.ToLower(temporaryFromConfigMap) == "true" + } + + if fromVersion, ok := interConnectConfigMap.Data["from-version"]; ok { + targetZoneMode.fromVersion = fromVersion + } + + klog.Infof("[getTargetInterConnectZoneMode] riccardo zone from configmap: %+v", targetZoneMode) + + return targetZoneMode, nil +} + func bootstrapOVN(conf *operv1.Network, kubeClient cnoclient.Client, infraStatus *bootstrap.InfraStatus) (*bootstrap.OVNBootstrapResult, error) { clusterConfig := &corev1.ConfigMap{} clusterConfigLookup := types.NamespacedName{Name: CLUSTER_CONFIG_NAME, Namespace: CLUSTER_CONFIG_NAMESPACE} @@ -1203,6 +1305,7 @@ func bootstrapOVN(conf *operv1.Network, kubeClient cnoclient.Client, infraStatus // any OVN cluster which is bootstrapped here, to the same initiator (should it still exists), hence we annotate the // network.operator.openshift.io CRD with this information and always try to re-use the same member for the OVN RAFT // cluster initialization + // TODO this is only needed in single-zone mode var clusterInitiator string currentAnnotation := conf.GetAnnotations() if cInitiator, ok := currentAnnotation[names.OVNRaftClusterInitiator]; ok && currentInitiatorExists(ovnMasterAddresses, cInitiator) { @@ -1220,6 +1323,8 @@ func bootstrapOVN(conf *operv1.Network, kubeClient cnoclient.Client, infraStatus } // Retrieve existing daemonsets or statefulsets status - used for deciding if upgrades should happen + // TODO in hypershift mode add the zone-mode in which master and node are + // (when switching to one mode to another, one DS can be in one mode and the other DS in the other mode) var nsn types.NamespacedName masterStatus := &bootstrap.OVNUpdateStatus{} nodeStatus := &bootstrap.OVNUpdateStatus{} @@ -1247,6 +1352,10 @@ func bootstrapOVN(conf *operv1.Network, kubeClient cnoclient.Client, infraStatus masterStatus.IPFamilyMode = masterSS.GetAnnotations()[names.NetworkIPFamilyModeAnnotation] masterStatus.Version = masterSS.GetAnnotations()["release.openshift.io/version"] masterStatus.Progressing = statefulSetProgressing(masterSS) + masterStatus.InterConnectZoneMode = string(getInterConnectZoneModeForMasterStatefulSet(masterSS)) + + klog.Infof("KEYWORD: jtan: master SS zone-mode=%s, progressing=%t", masterStatus.InterConnectZoneMode, masterStatus.Progressing) + } } else { masterDS := &appsv1.DaemonSet{ @@ -1255,21 +1364,62 @@ func bootstrapOVN(conf *operv1.Network, kubeClient cnoclient.Client, infraStatus APIVersion: appsv1.SchemeGroupVersion.String(), }, } - nsn = types.NamespacedName{Namespace: "openshift-ovn-kubernetes", Name: "ovnkube-master"} - if err := kubeClient.ClientFor("").CRClient().Get(context.TODO(), nsn, masterDS); err != nil { - if !apierrors.IsNotFound(err) { - return nil, fmt.Errorf("Failed to retrieve existing master DaemonSet: %w", err) + // TODO1 decide whether we should have the name ovnkube-master for both zone modes... it'd be less prone to errors + // The following commented code retrieves ovkube-control-plane DS first and, if it's missing, ovnkube-master + // nsn = types.NamespacedName{Namespace: util.OVN_NAMESPACE, Name: "ovnkube-control-plane"} // for multizone IC + // var errMaster error + // masterZoneMode := zoneModeUndefined + // if errMaster = kubeClient.ClientFor("").CRClient().Get(context.TODO(), nsn, masterDS); errMaster != nil { + // if !apierrors.IsNotFound(errMaster) { + // return nil, fmt.Errorf("Failed to retrieve existing ovnkube-control-plane DaemonSet: %w", errMaster) + // } else { + // // if there's no ovnkube-control-plane, see if we're in single-zone mode + // nsnSingleZone := types.NamespacedName{Namespace: util.OVN_NAMESPACE, Name: "ovnkube-master"} // for single-zone IC + // if errMaster = kubeClient.ClientFor("").CRClient().Get(context.TODO(), nsnSingleZone, masterDS); errMaster != nil { + // if !apierrors.IsNotFound(errMaster) { + // return nil, fmt.Errorf("Failed to retrieve existing single-zone master DaemonSet: %w", errMaster) + // } else { + // masterStatus = nil + // } + // } else { + // masterZoneMode = zoneModeSingleZone + + // } + // } + // } else { + // masterZoneMode = zoneModeMultiZone + // } + + // if errMaster == nil { + // masterStatus.Kind = "DaemonSet" + // masterStatus.Namespace = masterDS.Namespace + // masterStatus.Name = masterDS.Name + // masterStatus.IPFamilyMode = masterDS.GetAnnotations()[names.NetworkIPFamilyModeAnnotation] + // masterStatus.Version = masterDS.GetAnnotations()["release.openshift.io/version"] // current version for master DS + // masterStatus.Progressing = daemonSetProgressing(masterDS, false) + // masterStatus.InterConnectZoneMode = string(masterZoneMode) + // } + nsn = types.NamespacedName{Namespace: util.OVN_NAMESPACE, Name: "ovnkube-master"} // for multizone IC + var errMaster error + if errMaster = kubeClient.ClientFor("").CRClient().Get(context.TODO(), nsn, masterDS); errMaster != nil { + if !apierrors.IsNotFound(errMaster) { + return nil, fmt.Errorf("Failed to retrieve existing ovnkube-control-plane DaemonSet: %w", errMaster) } else { masterStatus = nil + klog.Infof("riccardo: master DS not found") } } else { masterStatus.Kind = "DaemonSet" masterStatus.Namespace = masterDS.Namespace masterStatus.Name = masterDS.Name masterStatus.IPFamilyMode = masterDS.GetAnnotations()[names.NetworkIPFamilyModeAnnotation] - masterStatus.Version = masterDS.GetAnnotations()["release.openshift.io/version"] + masterStatus.Version = masterDS.GetAnnotations()["release.openshift.io/version"] // current version for master DS masterStatus.Progressing = daemonSetProgressing(masterDS, false) + masterStatus.InterConnectZoneMode = string(getInterConnectZoneModeForMasterDaemonSet(masterDS)) + + klog.Infof("riccardo: master DS zone-mode=%s, progressing=%t", masterStatus.InterConnectZoneMode, masterStatus.Progressing) } + } nodeDS := &appsv1.DaemonSet{ @@ -1278,20 +1428,25 @@ func bootstrapOVN(conf *operv1.Network, kubeClient cnoclient.Client, infraStatus APIVersion: appsv1.SchemeGroupVersion.String(), }, } - nsn = types.NamespacedName{Namespace: "openshift-ovn-kubernetes", Name: "ovnkube-node"} + nsn = types.NamespacedName{Namespace: util.OVN_NAMESPACE, Name: "ovnkube-node"} if err := kubeClient.ClientFor("").CRClient().Get(context.TODO(), nsn, nodeDS); err != nil { if !apierrors.IsNotFound(err) { return nil, fmt.Errorf("Failed to retrieve existing node DaemonSet: %w", err) } else { nodeStatus = nil + klog.Infof("riccardo: node DS not found") } } else { nodeStatus.Kind = "DaemonSet" nodeStatus.Namespace = nodeDS.Namespace nodeStatus.Name = nodeDS.Name nodeStatus.IPFamilyMode = nodeDS.GetAnnotations()[names.NetworkIPFamilyModeAnnotation] - nodeStatus.Version = nodeDS.GetAnnotations()["release.openshift.io/version"] + nodeStatus.Version = nodeDS.GetAnnotations()["release.openshift.io/version"] // current version for node DS nodeStatus.Progressing = daemonSetProgressing(nodeDS, true) + nodeStatus.InterConnectZoneMode = string(getInterConnectZoneModeForNodeDaemonSet(nodeDS)) + + klog.Infof("riccardo: node DS zone-mode=%s, progressing=%t", nodeStatus.InterConnectZoneMode, nodeStatus.Progressing) + } prePullerDS := &appsv1.DaemonSet{ @@ -1300,7 +1455,7 @@ func bootstrapOVN(conf *operv1.Network, kubeClient cnoclient.Client, infraStatus APIVersion: appsv1.SchemeGroupVersion.String(), }, } - nsn = types.NamespacedName{Namespace: "openshift-ovn-kubernetes", Name: "ovnkube-upgrades-prepuller"} + nsn = types.NamespacedName{Namespace: util.OVN_NAMESPACE, Name: "ovnkube-upgrades-prepuller"} if err := kubeClient.ClientFor("").CRClient().Get(context.TODO(), nsn, prePullerDS); err != nil { if !apierrors.IsNotFound(err) { return nil, fmt.Errorf("Failed to retrieve existing prepuller DaemonSet: %w", err) @@ -1321,7 +1476,7 @@ func bootstrapOVN(conf *operv1.Network, kubeClient cnoclient.Client, infraStatus APIVersion: appsv1.SchemeGroupVersion.String(), }, } - nsn = types.NamespacedName{Namespace: "openshift-ovn-kubernetes", Name: "ovn-ipsec"} + nsn = types.NamespacedName{Namespace: util.OVN_NAMESPACE, Name: "ovn-ipsec"} if err := kubeClient.ClientFor("").CRClient().Get(context.TODO(), nsn, ipsecDS); err != nil { if !apierrors.IsNotFound(err) { return nil, fmt.Errorf("Failed to retrieve existing ipsec DaemonSet: %w", err) @@ -1608,6 +1763,118 @@ func shouldUpdateOVNKonUpgrade(ovn bootstrap.OVNBootstrapResult, releaseVersion return true, true } +// shouldUpdateOVNKonInterConnectZoneModeChange determines if we should roll out changes to +// the master and node daemonsets when the interconnect zone mode changes. +// When switching from multizone to single zone, we first roll out the new ovnk master DS +// and then the new ovnk node DS. For single zone to multizone, we do the opposite. +// When switching from single zone to multizone, as in upgrades from 4.13 to 4.14, +// we first roll out the new ovnk node DSand then the new ovnk master DS. +// For single zone to multizone (for internal use only), we do the opposite. +// This allows us to always have a working deployed ovnk while changing zone mode. +// To sum up: +// - single zone -> multizone: first roll out node, then master +// - multizone -> single zone: first roll out master, then node +func shouldUpdateOVNKonInterConnectZoneModeChange(ovn bootstrap.OVNBootstrapResult, targetZoneMode InterConnectZoneMode) (updateNode, updateMaster bool) { + // Fresh cluster - full steam ahead! + if ovn.NodeUpdateStatus == nil || ovn.MasterUpdateStatus == nil { + return true, true + } + + // if we're upgrading from a 4.13 cluster, which has no OVN InterConnect support, two phases are necessary. + // Phase 1: a) CNO pushes a configMap with zone-mode=singlezone, temporary=true; + // b) shouldUpdateOVNKonUpgrade rolls out first node DS then master DS in single-zone mode + // (there's no zone-mode change, since the 4.13 architecture is equivalent to single zone); + // Phase 2: a) Master and Node Daemonsets are now 4.14, so CNO removes the configMap; + // b) shouldUpdateOVNKonInterConnectZoneModeChange rolls out first node DS and then master DS, + // since without the configmap the desired zone mode is multizone and at the end of Phase 1 + // both DS's are in single zone. + // At the end, we have a 4.14 cluster in multizone mode. + + // When both DSs are in 4.13, we're in Phase 1 above, carried out by shouldUpdateOVNKonUpgrade. Nothing to do here. + if isVersionLessThanOrEqualTo(ovn.NodeUpdateStatus.Version, 4, 13) || isVersionLessThanOrEqualTo(ovn.MasterUpdateStatus.Version, 4, 13) { + return true, true + } + + if targetZoneMode == zoneModeMultiZone { + // no zone change: roll out both node and master DSs. + if ovn.NodeUpdateStatus.InterConnectZoneMode == string(zoneModeMultiZone) && + ovn.MasterUpdateStatus.InterConnectZoneMode == string(zoneModeMultiZone) { + klog.Infof("riccardo: [targetZoneMode=multizone] Master and Node are already in multizone") + return true, true + } + + // first step of single zone -> multizone. Roll out node DS first. + if ovn.NodeUpdateStatus.InterConnectZoneMode == string(zoneModeSingleZone) && + ovn.MasterUpdateStatus.InterConnectZoneMode == string(zoneModeSingleZone) { + klog.Infof("riccardo: [targetZoneMode=multizone] Master and Node are both in single zone: update node first") + return true, false + } + + // second (and final) step of single zone -> multizone. Rollout master DS. + if ovn.NodeUpdateStatus.InterConnectZoneMode == string(zoneModeMultiZone) && + ovn.MasterUpdateStatus.InterConnectZoneMode == string(zoneModeSingleZone) { + if ovn.NodeUpdateStatus.Progressing { + klog.Infof("riccardo: [targetZoneMode=multizone] Wait for multizone node to roll out before rolling out multizone master") + return true, false + } + klog.Infof("riccardo: [targetZoneMode=multizone] Node is already multizone, update master now") + return true, true + } + + // unexpected state of single zone -> multizone. Node is still in single zone, + // master is already in multizone (the opposite should happen). Converge to multizone + // for node as well, but emit warning. + if ovn.NodeUpdateStatus.InterConnectZoneMode == string(zoneModeSingleZone) && + ovn.MasterUpdateStatus.InterConnectZoneMode == string(zoneModeMultiZone) { + klog.Warningf("riccardo: [targetZoneMode=multizone] unexpected state: node is single zone, master is multizone. Update node too.") + return true, true + } + + klog.Warningf("riccardo: [targetZoneMode=multizone] undefined zone mode for master and node") + return true, true + + } else if targetZoneMode == zoneModeSingleZone { + // no zone change: roll out both node and master DSs. + if ovn.NodeUpdateStatus.InterConnectZoneMode == string(zoneModeSingleZone) && + ovn.MasterUpdateStatus.InterConnectZoneMode == string(zoneModeSingleZone) { + klog.Infof("riccardo: [targetZoneMode=singlezone] Master and Node are already in singlezone") + return true, true + } + + // first step of multizone -> single zone. Roll out master DS first. + if ovn.NodeUpdateStatus.InterConnectZoneMode == string(zoneModeMultiZone) && + ovn.MasterUpdateStatus.InterConnectZoneMode == string(zoneModeMultiZone) { + klog.Infof("riccardo: [targetZoneMode=singlezone] Master and Node are both in multizone: update master first") + return false, true + } + + // second (and final) step of multi zone -> single zone. Rollout node DS. + if ovn.NodeUpdateStatus.InterConnectZoneMode == string(zoneModeMultiZone) && + ovn.MasterUpdateStatus.InterConnectZoneMode == string(zoneModeSingleZone) { + if ovn.MasterUpdateStatus.Progressing { + klog.V(2).Infof("riccardo: [targetZoneMode=singlezone] Wait for single-zone master to roll out before rolling out single-zone node") + return false, true + } + klog.Infof("riccardo: [targetZoneMode=singlezone] Master is already single zone, roll out node now") + return true, false + } + + // unexpected state of multi zone -> single zone. Node is already in single zone, + // master is still in multizone (the opposite should happen). Converge to single zone + // for master as well, but emit warning. + if ovn.NodeUpdateStatus.InterConnectZoneMode == string(zoneModeSingleZone) && + ovn.MasterUpdateStatus.InterConnectZoneMode == string(zoneModeMultiZone) { + klog.Warningf("riccardo: [targetZoneMode=singlezone] unexpected state: node is single zone, master is multizone. Updating master too.") + return true, true + } + + klog.Warningf("riccardo: [targetZoneMode=singlezone] undefined zone mode for master and node") + return true, true + } + klog.Warningf("riccardo: undefined target zone mode") + return true, true +} + // daemonSetProgressing returns true if a daemonset is rolling out a change. // If allowHung is true, then treat a daemonset hung at 90% as "done" for our purposes. func daemonSetProgressing(ds *appsv1.DaemonSet, allowHung bool) bool { @@ -1737,3 +2004,192 @@ func isV6InternalSubnetLargeEnough(conf *operv1.NetworkSpec) bool { // reserve one IP for the gw, one IP for network and one for broadcasting return capacity.Cmp(maxNodesNum.Add(maxNodesNum, big.NewInt(3))) != -1 } + +func getInterConnectZoneModeForMasterStatefulSet(ss *appsv1.StatefulSet) InterConnectZoneMode { + for _, container := range ss.Spec.Template.Spec.Containers { + if container.Name == "ovnkube-control-plane" { + return zoneModeMultiZone + } + } + return zoneModeSingleZone +} + +// Determine the zone mode by looking for a known container name in multizone mode. +func getInterConnectZoneModeForDaemonSet(ds *appsv1.DaemonSet, knownContainerForMultiZone string) InterConnectZoneMode { + for _, container := range ds.Spec.Template.Spec.Containers { + if container.Name == knownContainerForMultiZone { + return zoneModeMultiZone + } + } + return zoneModeSingleZone +} + +func getInterConnectZoneModeForMasterDaemonSet(ds *appsv1.DaemonSet) InterConnectZoneMode { + return getInterConnectZoneModeForDaemonSet(ds, "ovnkube-control-plane") +} + +func getInterConnectZoneModeForNodeDaemonSet(ds *appsv1.DaemonSet) InterConnectZoneMode { + return getInterConnectZoneModeForDaemonSet(ds, "nbdb") +} + +// TODO: hacking the progressing field shouldn't be needed any more, since it's not taken into account +// by CVO. Try first with hacked version + progressing=True and then remove the hack on +// progressing field. +func prepareUpgradeToInterConnect(ovn bootstrap.OVNBootstrapResult, client cnoclient.Client, targetZoneMode *targetZoneModeType, hypershiftEnabled bool) (bool, error) { + // override progressing so that it's true all throughout the two upgrade phases handled below + progressing := (targetZoneMode.configMapFound && targetZoneMode.temporary || + (ovn.MasterUpdateStatus != nil && ovn.MasterUpdateStatus.Progressing) || + (ovn.NodeUpdateStatus != nil && ovn.NodeUpdateStatus.Progressing)) + + // TODO: FIX THIS HACK.. in phase 2 of the updated in hypershift have to manually delete the master stateful set and routes + if hypershiftEnabled && ovn.MasterUpdateStatus != nil && ovn.NodeUpdateStatus != nil && + ovn.NodeUpdateStatus.InterConnectZoneMode == string(zoneModeMultiZone) && + ovn.MasterUpdateStatus.InterConnectZoneMode == string(zoneModeSingleZone) && + !ovn.NodeUpdateStatus.Progressing && !ovn.MasterUpdateStatus.Progressing && + targetZoneMode.zoneMode == zoneModeMultiZone { + + if err := client.ClientFor(names.ManagementClusterName).Kubernetes().AppsV1().StatefulSets(ovn.MasterUpdateStatus.Namespace).Delete( + context.TODO(), "ovnkube-master", metav1.DeleteOptions{}); err != nil { + klog.Errorf("KEYWORD: jtan - tried to delete the stateful set: %v", err) + } + /* + if err := client.Default().Kubernetes().CoreV1().ConfigMaps(util.OVN_NAMESPACE).Delete( + context.TODO(), util.OVN_INTERCONNECT_CONFIGMAP_NAME, metav1.DeleteOptions{}); err != nil { + if apierrors.IsNotFound(err) { + klog.Infof("riccardo [upgrade to IC, phase2] IC config map not found") + } else { + return progressing, fmt.Errorf("could not delete interconnect configmap: %w", err) + } + */ + + } + // if node and master DSs are <= 4.13 (no IC support) and we're upgrading to >= 4.14 (IC), + // go through an intermediate step with IC single-zone DSs, tracked by a configmap that overrides + // the zone mode, created here by CNO. + if ovn.MasterUpdateStatus != nil && ovn.NodeUpdateStatus != nil && + isVersionLessThanOrEqualTo(ovn.MasterUpdateStatus.Version, 4, 13) && + isVersionLessThanOrEqualTo(ovn.NodeUpdateStatus.Version, 4, 13) && + isVersionGreaterThanOrEqualTo(os.Getenv("RELEASE_VERSION"), 4, 14) && + !ovn.MasterUpdateStatus.Progressing && + !ovn.NodeUpdateStatus.Progressing && + !targetZoneMode.configMapFound { + + klog.Infof("riccardo: [upgrade phase1] 4.13->4.14 creating tmp configmap for single zone") + + configMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: util.OVN_INTERCONNECT_CONFIGMAP_NAME, + Namespace: util.OVN_NAMESPACE, + }, + Data: map[string]string{ + "zone-mode": fmt.Sprint(zoneModeSingleZone), + "temporary": "true", + "from-version": ovn.MasterUpdateStatus.Version, + }, + } + if err := client.ClientFor("").CRClient().Create(context.TODO(), configMap); err != nil { + return progressing, fmt.Errorf("could not create interconnect configmap: %w", err) + } + // TODO consider running getTargetInterConnectZoneMode again + targetZoneMode.configMapFound = true + targetZoneMode.zoneMode = zoneModeSingleZone + targetZoneMode.temporary = true + targetZoneMode.fromVersion = ovn.MasterUpdateStatus.Version + + progressing = true + + } else if ovn.MasterUpdateStatus != nil && ovn.NodeUpdateStatus != nil && + isVersionGreaterThanOrEqualTo(ovn.MasterUpdateStatus.Version, 4, 14) && + isVersionGreaterThanOrEqualTo(ovn.NodeUpdateStatus.Version, 4, 14) && + ovn.MasterUpdateStatus.InterConnectZoneMode == string(zoneModeSingleZone) && + ovn.NodeUpdateStatus.InterConnectZoneMode == string(zoneModeSingleZone) && + !ovn.MasterUpdateStatus.Progressing && + !ovn.NodeUpdateStatus.Progressing && + targetZoneMode.configMapFound && targetZoneMode.temporary { + + // if node and master DSs have already upgraded to >= 4.14 single zone and + // we previously pushed a configmap for temporary single-mode zone, + // remove the configmap and proceed with the change of zone mode to multizone. + klog.Infof("riccardo: [upgrade phase2] 4.13->4.14 patching tmp configmap for multizone") + + patch := []map[string]interface{}{ + { + "op": "replace", + "path": "/data/zone-mode", + "value": fmt.Sprint(zoneModeMultiZone), + }, + { + "op": "replace", + "path": "/data/temporary", + "value": "false", + }, + } + + patchBytes, err := json.Marshal(patch) + if err != nil { + return progressing, fmt.Errorf("could not marshal patch for interconnect configmap: %w", err) + } + newConfigMap, err := client.ClientFor("").Kubernetes().CoreV1().ConfigMaps(util.OVN_NAMESPACE).Patch( + context.TODO(), util.OVN_INTERCONNECT_CONFIGMAP_NAME, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) + if err != nil { + return progressing, fmt.Errorf("could not patch existing interconnect configmap: %w", err) + } + + klog.Infof("riccardo patched configmap: %+v", newConfigMap) + + // TODO consider running getTargetInterConnectZoneMode again + targetZoneMode.zoneMode = zoneModeMultiZone + targetZoneMode.temporary = false + + progressing = true + + } else if ovn.MasterUpdateStatus != nil && ovn.NodeUpdateStatus != nil && + // isVersionGreaterThanOrEqualTo(ovn.MasterUpdateStatus.Version, 4, 14) && // not sure if I should inspect the version yet.. + // isVersionGreaterThanOrEqualTo(ovn.NodeUpdateStatus.Version, 4, 14) && + ovn.MasterUpdateStatus.InterConnectZoneMode == string(zoneModeMultiZone) && + ovn.NodeUpdateStatus.InterConnectZoneMode == string(zoneModeMultiZone) && + !ovn.MasterUpdateStatus.Progressing && + !ovn.NodeUpdateStatus.Progressing && + targetZoneMode.configMapFound && + targetZoneMode.zoneMode == zoneModeMultiZone && + !targetZoneMode.temporary { + + // phase 2 is over: daemonsets have rolled out in multizone mode + // Remove the configmap: this won't trigger any further roll out, but + // CNO will update the version it reports. + klog.Infof("riccardo: deleting IC configmap; upgrade is done") + if err := client.Default().Kubernetes().CoreV1().ConfigMaps(util.OVN_NAMESPACE).Delete( + context.TODO(), util.OVN_INTERCONNECT_CONFIGMAP_NAME, metav1.DeleteOptions{}); err != nil { + if apierrors.IsNotFound(err) { + klog.Infof("riccardo [upgrade to IC, phase2] IC config map not found") + } else { + return progressing, fmt.Errorf("could not delete interconnect configmap: %w", err) + } + } + + // Once we're here, there are no more updates to the DSs and CNO won't update the version in its status... + // TODO AWFUL HACK! FIND A WAY AROUND THIS + return progressing, annotateMasterDaemonset(client.ClientFor("").Kubernetes()) + } + + return progressing, nil + +} + +// TODO: TEMPORARY HACK TO TRIGGER POD STATUS UPDATE SO THAT CNO STATUS REPORTS NEW VERSION +func annotateMasterDaemonset(kubeClient kubernetes.Interface) error { + masterDS, err := kubeClient.AppsV1().DaemonSets(util.OVN_NAMESPACE).Get(context.TODO(), "ovnkube-master", metav1.GetOptions{}) + if err != nil { + return err + } + + masterDS.Annotations["interconnect-upgrade"] = "done" + + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + _, err = kubeClient.AppsV1().DaemonSets(util.OVN_NAMESPACE).Update(context.TODO(), masterDS, metav1.UpdateOptions{}) + return err + }); err != nil { + return err + } + return nil +} diff --git a/pkg/network/ovn_kubernetes_test.go b/pkg/network/ovn_kubernetes_test.go index aeecd45959..995ad86905 100644 --- a/pkg/network/ovn_kubernetes_test.go +++ b/pkg/network/ovn_kubernetes_test.go @@ -27,6 +27,7 @@ import ( operv1 "github.com/openshift/api/operator/v1" "github.com/openshift/cluster-network-operator/pkg/bootstrap" + cnofake "github.com/openshift/cluster-network-operator/pkg/client/fake" "github.com/openshift/cluster-network-operator/pkg/names" ) @@ -84,8 +85,9 @@ func TestRenderOVNKubernetes(t *testing.T) { }, }, } + fakeClient := cnofake.NewFakeClient() // TODO add a number of nodes - objs, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn) + objs, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient) g.Expect(err).NotTo(HaveOccurred()) g.Expect(objs).To(ContainElement(HaveKubernetesID("DaemonSet", "openshift-ovn-kubernetes", "ovnkube-node"))) g.Expect(objs).To(ContainElement(HaveKubernetesID("DaemonSet", "openshift-ovn-kubernetes", "ovnkube-master"))) @@ -140,7 +142,8 @@ func TestRenderOVNKubernetesIPv6(t *testing.T) { }, }, } - objs, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn) + fakeClient := cnofake.NewFakeClient() // TODO add a number of nodes + objs, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient) g.Expect(err).NotTo(HaveOccurred()) script, err := findNBDBPostStart(objs) @@ -161,7 +164,7 @@ func TestRenderOVNKubernetesIPv6(t *testing.T) { }, }, } - objs, _, err = renderOVNKubernetes(config, bootstrapResult, manifestDirOvn) + objs, _, err = renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient) g.Expect(err).NotTo(HaveOccurred()) script, err = findNBDBPostStart(objs) @@ -686,7 +689,8 @@ nodeport=true`, DisableUDPAggregation: tc.disableGRO, }, } - objs, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn) + fakeClient := cnofake.NewFakeClient() // TODO add a number of nodes + objs, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient) g.Expect(err).NotTo(HaveOccurred()) confFile := extractOVNKubeConfig(g, objs) g.Expect(confFile).To(Equal(strings.TrimSpace(tc.expected))) @@ -1686,13 +1690,14 @@ metadata: PrePullerUpdateStatus: prepullerStatus, } - objs, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn) + fakeClient := cnofake.NewFakeClient() // TODO add a number of nodes + objs, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient) g.Expect(err).NotTo(HaveOccurred()) renderedNode := findInObjs("apps", "DaemonSet", "ovnkube-node", "openshift-ovn-kubernetes", objs) _, preserveNode := renderedNode.GetAnnotations()[names.CreateOnlyAnnotation] - renderedMaster := findInObjs("apps", "DaemonSet", "ovnkube-master", "openshift-ovn-kubernetes", objs) - _, preserveMaster := renderedMaster.GetAnnotations()[names.CreateOnlyAnnotation] + renderedMaster := findInObjs("apps", "DaemonSet", "ovnkube-control-plane", "openshift-ovn-kubernetes", objs) + _, preserveMaster := renderedMaster.GetAnnotations()[names.CreateOnlyAnnotation] // TODO panic here renderedPrePuller := findInObjs("apps", "DaemonSet", "ovnkube-upgrades-prepuller", "openshift-ovn-kubernetes", objs) // if we expect a node update, the original node and the rendered one must be different @@ -1998,12 +2003,13 @@ func TestRenderOVNKubernetesDualStackPrecedenceOverUpgrade(t *testing.T) { // the new rendered config should hold the node to do the dualstack conversion // the upgrade code holds the masters to update the nodes first - objs, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn) + fakeClient := cnofake.NewFakeClient() // TODO add a number of nodes + objs, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient) if err != nil { t.Errorf("Unexpected error: %v", err) } renderedNode := findInObjs("apps", "DaemonSet", "ovnkube-node", "openshift-ovn-kubernetes", objs) - renderedMaster := findInObjs("apps", "DaemonSet", "ovnkube-master", "openshift-ovn-kubernetes", objs) + renderedMaster := findInObjs("apps", "DaemonSet", "ovnkube-control-plane", "openshift-ovn-kubernetes", objs) // the node has to be the same if _, ok := renderedNode.GetAnnotations()[names.CreateOnlyAnnotation]; !ok { @@ -2091,7 +2097,8 @@ func TestRenderOVNKubernetesOVSFlowsConfigMap(t *testing.T) { }, FlowsConfig: tc.FlowsConfig, } - objs, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn) + fakeClient := cnofake.NewFakeClient() // TODO add a number of nodes + objs, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient) g.Expect(err).ToNot(HaveOccurred()) nodeDS := findInObjs("apps", "DaemonSet", "ovnkube-node", "openshift-ovn-kubernetes", objs) ds := appsv1.DaemonSet{} diff --git a/pkg/network/render.go b/pkg/network/render.go index cb4c8b43f5..7174cf8dea 100644 --- a/pkg/network/render.go +++ b/pkg/network/render.go @@ -66,7 +66,7 @@ func Render(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.BootstrapResult objs = append(objs, o...) // render default network - o, progressing, err = renderDefaultNetwork(conf, bootstrapResult, manifestDir) + o, progressing, err = renderDefaultNetwork(conf, bootstrapResult, manifestDir, client) if err != nil { return nil, progressing, err } @@ -561,7 +561,7 @@ func validateMigration(conf *operv1.NetworkSpec) []error { // renderDefaultNetwork generates the manifests corresponding to the requested // default network -func renderDefaultNetwork(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.BootstrapResult, manifestDir string) ([]*uns.Unstructured, bool, error) { +func renderDefaultNetwork(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.BootstrapResult, manifestDir string, client cnoclient.Client) ([]*uns.Unstructured, bool, error) { dn := conf.DefaultNetwork if errs := validateDefaultNetwork(conf); len(errs) > 0 { return nil, false, errors.Errorf("invalid Default Network configuration: %v", errs) @@ -571,7 +571,7 @@ func renderDefaultNetwork(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.B case operv1.NetworkTypeOpenShiftSDN: return renderOpenShiftSDN(conf, bootstrapResult, manifestDir) case operv1.NetworkTypeOVNKubernetes: - return renderOVNKubernetes(conf, bootstrapResult, manifestDir) + return renderOVNKubernetes(conf, bootstrapResult, manifestDir, client) case operv1.NetworkTypeKuryr: return renderKuryr(conf, bootstrapResult, manifestDir) default: diff --git a/pkg/network/semver.go b/pkg/network/semver.go index b12099f793..e2509f9c32 100644 --- a/pkg/network/semver.go +++ b/pkg/network/semver.go @@ -49,3 +49,37 @@ func compareVersions(fromVersion, toVersion string) versionChange { return versionChange(v1.Compare(v2)) } + +func isVersionGreaterThanOrEqualTo(version string, major int, minor int) bool { + v, err := semver.NewVersion(version) + if err != nil { + klog.Errorf("failed to parse version %s: %v", version, err) + return false + } + // 4.14 vs 5.13 + // 4.14 vs 4.13 + // 4.14 vs 3.15 + if v.Major() > int64(major) { + return true + } else if v.Major() == int64(major) { + return v.Minor() >= int64(minor) + } else { + return false + } +} + +func isVersionLessThanOrEqualTo(version string, major int, minor int) bool { + v, err := semver.NewVersion(version) + if err != nil { + klog.Errorf("failed to parse version %s: %v", version, err) + return false + } + + if v.Major() < int64(major) { + return true + } else if v.Major() == int64(major) { + return v.Minor() <= int64(minor) + } else { + return false + } +} diff --git a/pkg/network/semver_test.go b/pkg/network/semver_test.go index 2f908b6d3a..a9c67d6d21 100644 --- a/pkg/network/semver_test.go +++ b/pkg/network/semver_test.go @@ -60,3 +60,57 @@ func TestDirection(t *testing.T) { }) } } + +func TestVersionComparison(t *testing.T) { + for idx, tc := range []struct { + version string + otherVersionMajor int + otherVersionMinor int + resultLessThanOrEqualTo bool + resultGreaterThanOrEqualTo bool + }{ + { + "4.14", + 4, 14, + true, // <= + true, // >= + }, + { + "4.14", + 4, 15, + true, // <= + false, // >= + }, + { + "4.14", + 4, 13, + false, // <= + true, // >= + }, + { + "4.14.0-0.ci.test-2023-06-14-124931-ci-ln-md7ivqb-latest", + 4, 14, + true, // <= + true, // >= + }, + { + "4.14.0-0.ci.test-2023-06-14-124931-ci-ln-md7ivqb-latest", + 4, 15, + true, // <= + false, // >= + }, + { + "4.14.0-0.ci.test-2023-06-14-124931-ci-ln-md7ivqb-latest", + 4, 13, + false, // <= + true, // >= + }, + } { + t.Run(strconv.Itoa(idx), func(t *testing.T) { + g := NewGomegaWithT(t) + + g.Expect(isVersionLessThanOrEqualTo(tc.version, tc.otherVersionMajor, tc.otherVersionMinor)).To(Equal(tc.resultLessThanOrEqualTo)) + g.Expect(isVersionGreaterThanOrEqualTo(tc.version, tc.otherVersionMajor, tc.otherVersionMinor)).To(Equal(tc.resultGreaterThanOrEqualTo)) + }) + } +} diff --git a/pkg/util/util.go b/pkg/util/util.go new file mode 100644 index 0000000000..af90106273 --- /dev/null +++ b/pkg/util/util.go @@ -0,0 +1,16 @@ +package util + +import ( + "context" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +const OVN_INTERCONNECT_CONFIGMAP_NAME = "ovn-interconnect-configuration" +const OVN_NAMESPACE = "openshift-ovn-kubernetes" + +func GetInterConnectConfigMap(kubeClient kubernetes.Interface) (*corev1.ConfigMap, error) { + return kubeClient.CoreV1().ConfigMaps(OVN_NAMESPACE).Get(context.TODO(), OVN_INTERCONNECT_CONFIGMAP_NAME, metav1.GetOptions{}) +}