diff --git a/bindata/network/ovn-kubernetes/self-hosted/ovnkube-cluster-manager.yaml b/bindata/network/ovn-kubernetes/self-hosted/ovnkube-cluster-manager.yaml new file mode 100644 index 0000000000..70ffc71cd0 --- /dev/null +++ b/bindata/network/ovn-kubernetes/self-hosted/ovnkube-cluster-manager.yaml @@ -0,0 +1,232 @@ +# The ovnkube control-plane components +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: ovnkube-control-plane + namespace: openshift-ovn-kubernetes + annotations: + kubernetes.io/description: | + This daemonset launches the ovn-kubernetes controller (master) networking components. + release.openshift.io/version: "{{.ReleaseVersion}}" +spec: + selector: + matchLabels: + app: ovnkube-control-plane + updateStrategy: + type: RollingUpdate + rollingUpdate: + # by default, Deployments spin up the new pod before terminating the old one + # but we don't want that - because ovsdb holds the lock. + maxSurge: 0 + maxUnavailable: 1 + template: + metadata: + annotations: + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + labels: + app: ovnkube-control-plane + ovn-db-pod: "true" + component: network + type: infra + openshift.io/component: network + kubernetes.io/os: "linux" + spec: + serviceAccountName: ovn-kubernetes-controller + hostNetwork: true + dnsPolicy: Default + priorityClassName: "system-cluster-critical" + # volumes in all containers: + # (container) -> (host) + # /etc/openvswitch -> /var/lib/ovn/etc - ovsdb data + # /var/lib/openvswitch -> /var/lib/ovn/data - ovsdb pki state + # /run/openvswitch -> tmpfs - sockets + # /env -> configmap env-overrides - debug overrides + containers: + - name: kube-rbac-proxy + image: {{.KubeRBACProxyImage}} + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + TLS_PK=/etc/pki/tls/metrics-cert/tls.key + TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt + # As the secret mount is optional we must wait for the files to be present. + # The service is created in monitor.yaml and this is created in sdn.yaml. + TS=$(date +%s) + WARN_TS=$(( ${TS} + $(( 20 * 60)) )) + HAS_LOGGED_INFO=0 + + log_missing_certs(){ + CUR_TS=$(date +%s) + if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then + echo $(date -Iseconds) WARN: ovn-master-metrics-cert not mounted after 20 minutes. + elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then + echo $(date -Iseconds) INFO: ovn-master-metrics-cert not mounted. Waiting 20 minutes. + HAS_LOGGED_INFO=1 + fi + } + while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do + log_missing_certs + sleep 5 + done + + echo $(date -Iseconds) INFO: ovn-master-metrics-certs mounted, starting kube-rbac-proxy + exec /usr/bin/kube-rbac-proxy \ + --logtostderr \ + --secure-listen-address=:9106 \ + --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ + --upstream=http://127.0.0.1:29104/ \ + --tls-private-key-file=${TLS_PK} \ + --tls-cert-file=${TLS_CERT} + ports: + - containerPort: 9106 + name: https + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: ovn-master-metrics-cert + mountPath: /etc/pki/tls/metrics-cert + readOnly: True + + # ovnkube master: convert kubernetes objects in to nbdb logical network components + - name: ovnkube-control-plane + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + if [[ -f "/env/_master" ]]; then + set -o allexport + source "/env/_master" + set +o allexport + fi + + if [ "{{.OVN_GATEWAY_MODE}}" == "shared" ]; then + gateway_mode_flags="--gateway-mode shared --gateway-interface br-ex" + elif [ "{{.OVN_GATEWAY_MODE}}" == "local" ]; then + gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" + else + echo "Invalid OVN_GATEWAY_MODE: \"{{.OVN_GATEWAY_MODE}}\". Must be \"local\" or \"shared\"." + exit 1 + fi + + multi_network_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_ENABLE}}" == "true" ]]; then + multi_network_enabled_flag="--enable-multi-network" + fi + + echo "I$(date "+%m%d %H:%M:%S.%N") - ovnkube-control-plane - start ovnkube --init-master ${K8S_NODE}" + exec /usr/bin/ovnkube \ + --init-cluster-manager "${K8S_NODE}" \ + --config-file=/run/ovnkube-config/ovnkube.conf \ + --ovn-empty-lb-events \ + --loglevel "${OVN_KUBE_LOG_LEVEL}" \ + --metrics-bind-address "127.0.0.1:29104" \ + --metrics-enable-pprof \ + --metrics-enable-config-duration \ + ${gateway_mode_flags} \ + --sb-address "{{.OVN_SB_DB_LIST}}" \ + --sb-client-privkey /ovn-cert/tls.key \ + --sb-client-cert /ovn-cert/tls.crt \ + --sb-client-cacert /ovn-ca/ca-bundle.crt \ + --sb-cert-common-name "{{.OVN_CERT_CN}}" \ + --nb-address "{{.OVN_NB_DB_LIST}}" \ + --nb-client-privkey /ovn-cert/tls.key \ + --nb-client-cert /ovn-cert/tls.crt \ + --nb-client-cacert /ovn-ca/ca-bundle.crt \ + --nb-cert-common-name "{{.OVN_CERT_CN}}" \ + --enable-multicast \ + --disable-snat-multiple-gws \ + ${multi_network_enabled_flag} \ + --enable-interconnect \ + --acl-logging-rate-limit "{{.OVNPolicyAuditRateLimit}}" + volumeMounts: + # for checking ovs-configuration service + - mountPath: /etc/systemd/system + name: systemd-units + readOnly: true + - mountPath: /etc/openvswitch/ + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch/ + name: var-lib-openvswitch + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /run/ovnkube-config/ + name: ovnkube-config + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + env: + - name: OVN_KUBE_LOG_LEVEL + value: "4" + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + ports: + - name: metrics-port + containerPort: 29104 + terminationMessagePolicy: FallbackToLogsOnError + nodeSelector: + node-role.kubernetes.io/master: "" + beta.kubernetes.io/os: "linux" + volumes: + # for checking ovs-configuration service + - name: systemd-units + hostPath: + path: /etc/systemd/system + - name: etc-openvswitch + hostPath: + path: /var/lib/ovn/etc + - name: var-lib-openvswitch + hostPath: + path: /var/lib/ovn/data + - name: run-openvswitch + hostPath: + path: /var/run/openvswitch + - name: run-ovn + hostPath: + path: /var/run/ovn + - name: ovnkube-config + configMap: + name: ovnkube-config + - name: env-overrides + configMap: + name: env-overrides + optional: true + - name: ovn-ca + configMap: + name: ovn-ca + - name: ovn-cert + secret: + secretName: ovn-cert + - name: ovn-master-metrics-cert + secret: + secretName: ovn-master-metrics-cert + optional: true + tolerations: + - key: "node-role.kubernetes.io/master" + operator: "Exists" + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + - key: "node.kubernetes.io/network-unavailable" + operator: "Exists" diff --git a/bindata/network/ovn-kubernetes/self-hosted/ovnkube-node.yaml b/bindata/network/ovn-kubernetes/self-hosted/ovnkube-local.yaml similarity index 57% rename from bindata/network/ovn-kubernetes/self-hosted/ovnkube-node.yaml rename to bindata/network/ovn-kubernetes/self-hosted/ovnkube-local.yaml index 3bd2460d2e..57ccd6f1c2 100644 --- a/bindata/network/ovn-kubernetes/self-hosted/ovnkube-node.yaml +++ b/bindata/network/ovn-kubernetes/self-hosted/ovnkube-local.yaml @@ -1,14 +1,7 @@ ---- kind: DaemonSet apiVersion: apps/v1 metadata: - {{ if eq .OVN_NODE_MODE "dpu-host" }} - name: ovnkube-node-dpu-host - {{ else if eq .OVN_NODE_MODE "smart-nic" }} - name: ovnkube-node-smart-nic - {{ else }} - name: ovnkube-node - {{ end }} + name: ovnkube namespace: openshift-ovn-kubernetes annotations: kubernetes.io/description: | @@ -17,13 +10,7 @@ metadata: spec: selector: matchLabels: - {{ if eq .OVN_NODE_MODE "dpu-host" }} - app: ovnkube-node-dpu-host - {{ else if eq .OVN_NODE_MODE "smart-nic" }} - app: ovnkube-node-smart-nic - {{ else }} - app: ovnkube-node - {{ end }} + app: ovnkube updateStrategy: type: RollingUpdate rollingUpdate: @@ -33,13 +20,7 @@ spec: annotations: target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' labels: - {{ if eq .OVN_NODE_MODE "dpu-host" }} - app: ovnkube-node-dpu-host - {{ else if eq .OVN_NODE_MODE "smart-nic" }} - app: ovnkube-node-smart-nic - {{ else }} - app: ovnkube-node - {{ end }} + app: ovnkube component: network type: infra openshift.io/component: network @@ -50,31 +31,15 @@ spec: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - {{ if .DpuHostModeLabel }} - - key: {{ .DpuHostModeLabel }} + - key: network.operator.openshift.io/dpu-host {{ if eq .OVN_NODE_MODE "dpu-host" }} operator: Exists - {{ else if eq .OVN_NODE_MODE "smart-nic" }} - operator: DoesNotExist - {{ else }} - operator: DoesNotExist - {{ end }} - {{ end }} - {{ if .SmartNicModeLabel }} - - key: {{ .SmartNicModeLabel }} - {{ if eq .OVN_NODE_MODE "dpu-host" }} - operator: DoesNotExist - {{ else if eq .OVN_NODE_MODE "smart-nic" }} - operator: Exists {{ else }} operator: DoesNotExist {{ end }} - {{ end }} - {{ if .DpuModeLabel }} - - key: {{ .DpuModeLabel }} + - key: network.operator.openshift.io/dpu operator: DoesNotExist - {{ end }} - serviceAccountName: ovn-kubernetes-node + serviceAccountName: ovn-kubernetes-controller hostNetwork: true dnsPolicy: Default hostPID: true @@ -86,7 +51,7 @@ spec: # /run/openvswitch -> tmpfs - ovsdb sockets # /env -> configmap env-overrides - debug overrides containers: - {{ if or (eq .OVN_NODE_MODE "full") (eq .OVN_NODE_MODE "smart-nic") }} + {{ if eq .OVN_NODE_MODE "full" }} # ovn-controller: programs the vswitch with flows from the sbdb - name: ovn-controller image: "{{.OvnImage}}" @@ -99,8 +64,8 @@ spec: set -o allexport source "/env/${K8S_NODE}" set +o allexport - fi - + fi + echo "$(date -Iseconds) - starting ovn-controller" exec ovn-controller unix:/var/run/openvswitch/db.sock -vfile:off \ --no-chdir --pidfile=/var/run/ovn/ovn-controller.pid \ @@ -156,7 +121,7 @@ spec: set -euo pipefail # Rotate audit log files when then get to max size (in bytes) - MAXFILESIZE=$(( "{{.OVNPolicyAuditMaxFileSize}}"*1000000 )) + MAXFILESIZE=$(( "{{.OVNPolicyAuditMaxFileSize}}"*1000000 )) LOGFILE=/var/log/ovn/acl-audit-log.log CONTROLLERPID=$(cat /run/ovn/ovn-controller.pid) @@ -165,14 +130,14 @@ spec: while true do - # Make sure ovn-controller's logfile exists, and get current size in bytes - if [ -f "$LOGFILE" ]; then + # Make sure ovn-controller's logfile exists, and get current size in bytes + if [ -f "$LOGFILE" ]; then file_size=`du -b ${LOGFILE} | tr -s '\t' ' ' | cut -d' ' -f1` - else + else ovs-appctl -t /var/run/ovn/ovn-controller.${CONTROLLERPID}.ctl vlog/reopen file_size=`du -b ${LOGFILE} | tr -s '\t' ' ' | cut -d' ' -f1` - fi - + fi + if [ $file_size -gt $MAXFILESIZE ];then echo "Rotating OVN ACL Log File" timestamp=`date '+%Y-%m-%dT%H-%M-%S'` @@ -181,8 +146,8 @@ spec: CONTROLLERPID=$(cat /run/ovn/ovn-controller.pid) fi - # sleep for 30 seconds to avoid wasting CPU - sleep 30 + # sleep for 30 seconds to avoid wasting CPU + sleep 30 done resources: requests: @@ -195,7 +160,7 @@ spec: - mountPath: /run/ovn/ name: run-ovn {{ end }} - - name: kube-rbac-proxy + - name: kube-rbac-proxy-node image: {{.KubeRBACProxyImage}} command: - /bin/bash @@ -212,7 +177,7 @@ spec: TS=$(date +%s) WARN_TS=$(( ${TS} + $(( 20 * 60)) )) HAS_LOGGED_INFO=0 - + log_missing_certs(){ CUR_TS=$(date +%s) if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then @@ -226,7 +191,7 @@ spec: log_missing_certs sleep 5 done - + echo $(date -Iseconds) INFO: ovn-node-metrics-certs mounted, starting kube-rbac-proxy exec /usr/bin/kube-rbac-proxy \ --logtostderr \ @@ -299,6 +264,441 @@ spec: - name: ovn-node-metrics-cert mountPath: /etc/pki/tls/metrics-cert readOnly: True + # ovn-northd: convert network objects in nbdb to flows in sbdb + - name: northd + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xem + if [[ -f /env/_master ]]; then + set -o allexport + source /env/_master + set +o allexport + fi + + quit() { + echo "$(date -Iseconds) - stopping ovn-northd" + OVN_MANAGE_OVSDB=no /usr/share/ovn/scripts/ovn-ctl stop_northd + echo "$(date -Iseconds) - ovn-northd stopped" + rm -f /var/run/ovn/ovn-northd.pid + exit 0 + } + # end of quit + trap quit TERM INT + + echo "$(date -Iseconds) - starting ovn-northd" + exec ovn-northd \ + --no-chdir "-vconsole:${OVN_LOG_LEVEL}" -vfile:off "-vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + --pidfile /var/run/ovn/ovn-northd.pid & + + wait $! + lifecycle: + preStop: + exec: + command: + - OVN_MANAGE_OVSDB=no + - /usr/share/ovn/scripts/ovn-ctl + - stop_northd + env: + - name: OVN_LOG_LEVEL + value: info + volumeMounts: + - mountPath: /etc/openvswitch/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch/ + name: var-lib-openvswitch + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert # not needed, but useful when exec'ing in to pod. + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + terminationMessagePolicy: FallbackToLogsOnError + + # nbdb: the northbound, or logical network object DB. In standalone mode listening on unix socket. + - name: nbdb + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xem + if [[ -f /env/_master ]]; then + set -o allexport + source /env/_master + set +o allexport + fi + + quit() { + echo "$(date -Iseconds) - stopping nbdb" + /usr/share/ovn/scripts/ovn-ctl stop_nb_ovsdb + echo "$(date -Iseconds) - nbdb stopped" + rm -f /var/run/ovn/ovnnb_db.pid + exit 0 + } + # end of quit + trap quit TERM INT + + bracketify() { case "$1" in *:*) echo "[$1]" ;; *) echo "$1" ;; esac } + + # checks if a db pod is part of a current cluster + + OVN_ARGS="--no-monitor" + exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ + --ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + run_nb_ovsdb & + + wait $! + + lifecycle: + postStart: + exec: + command: + - /bin/bash + - -c + - | + set -x + rm -f /var/run/ovn/ovnnb_db.pid + #configure northd_probe_interval + OVN_NB_CTL="ovn-nbctl" + + echo "Setting the IC zone to ${K8S_NODE}" + retries=0 + current_probe_interval=0 + while [[ "${retries}" -lt 10 ]]; do + current_probe_interval=$(${OVN_NB_CTL} set NB_Global . name="${K8S_NODE}" options:name="${K8S_NODE}") + if [[ $? == 0 ]]; then + current_probe_interval=$(echo ${current_probe_interval} | tr -d '\"') + break + else + sleep 2 + (( retries += 1 )) + fi + done + + northd_probe_interval=${OVN_NORTHD_PROBE_INTERVAL:-5000} + echo "Setting northd probe interval to ${northd_probe_interval} ms" + retries=0 + current_probe_interval=0 + while [[ "${retries}" -lt 10 ]]; do + current_probe_interval=$(${OVN_NB_CTL} --if-exists get NB_GLOBAL . options:northd_probe_interval) + if [[ $? == 0 ]]; then + current_probe_interval=$(echo ${current_probe_interval} | tr -d '\"') + break + else + sleep 2 + (( retries += 1 )) + fi + done + + if [[ "${current_probe_interval}" != "${northd_probe_interval}" ]]; then + retries=0 + while [[ "${retries}" -lt 10 ]]; do + ${OVN_NB_CTL} set NB_GLOBAL . options:northd_probe_interval=${northd_probe_interval} + if [[ $? != 0 ]]; then + echo "Failed to set northd probe interval to ${northd_probe_interval}. retrying....." + sleep 2 + (( retries += 1 )) + else + echo "Successfully set northd probe interval to ${northd_probe_interval} ms" + break + fi + done + fi + + preStop: + exec: + command: + - /usr/bin/ovn-appctl + - -t + - /var/run/ovn/ovnnb_db.ctl + - exit + readinessProbe: +{{ if not .IsSNO }} + initialDelaySeconds: 90 +{{ end }} + timeoutSeconds: 5 + exec: + command: + - /bin/bash + - -c + - | + set -xeo pipefail + status=$(/usr/bin/ovn-appctl -t /var/run/ovn/ovnnb_db.ctl --timeout=3 ovsdb-server/sync-status 2>/dev/null | { grep "state: active" || false; }) + if [[ -z "${status}" ]]; then + echo "NB DB is not running or active." + exit 1 + fi + + env: + - name: OVN_LOG_LEVEL + value: info + - name: OVN_NORTHD_PROBE_INTERVAL + value: "{{.OVN_NORTHD_PROBE_INTERVAL}}" + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - mountPath: /etc/openvswitch/ + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch/ + name: var-lib-openvswitch + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + terminationMessagePolicy: FallbackToLogsOnError + + # sbdb: the southbound, or logical network object DB. In standalone mode listening on unix socket + - name: sbdb + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xem + if [[ -f /env/_master ]]; then + set -o allexport + source /env/_master + set +o allexport + fi + + quit() { + echo "$(date -Iseconds) - stopping sbdb" + /usr/share/ovn/scripts/ovn-ctl stop_sb_ovsdb + echo "$(date -Iseconds) - sbdb stopped" + rm -f /var/run/ovn/ovnsb_db.pid + exit 0 + } + # end of quit + trap quit TERM INT + + bracketify() { case "$1" in *:*) echo "[$1]" ;; *) echo "$1" ;; esac } + + OVN_ARGS="--no-monitor" + exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ + --ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ + run_sb_ovsdb & + + wait $! + + lifecycle: + postStart: + exec: + command: + - /bin/bash + - -c + - | + set -x + rm -f /var/run/ovn/ovnsb_db.pid + + preStop: + exec: + command: + - /usr/bin/ovn-appctl + - -t + - /var/run/ovn/ovnsb_db.ctl + - exit + readinessProbe: +{{ if not .IsSNO }} + initialDelaySeconds: 90 +{{ end }} + timeoutSeconds: 5 + exec: + command: + - /bin/bash + - -c + - | + set -xeo pipefail + status=$(/usr/bin/ovn-appctl -t /var/run/ovn/ovnsb_db.ctl --timeout=3 ovsdb-server/sync-status 2>/dev/null | { grep "state: active" || false; }) + if [[ -z "${status}" ]]; then + echo "SB DB is not running or active." + exit 1 + fi + + env: + - name: OVN_LOG_LEVEL + value: info + volumeMounts: + - mountPath: /etc/openvswitch/ + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch/ + name: var-lib-openvswitch + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + terminationMessagePolicy: FallbackToLogsOnError + + - name: kube-rbac-proxy-ncm + image: {{.KubeRBACProxyImage}} + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + TLS_PK=/etc/pki/tls/metrics-cert/tls.key + TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt + # As the secret mount is optional we must wait for the files to be present. + # The service is created in monitor.yaml and this is created in sdn.yaml. + TS=$(date +%s) + WARN_TS=$(( ${TS} + $(( 20 * 60)) )) + HAS_LOGGED_INFO=0 + + log_missing_certs(){ + CUR_TS=$(date +%s) + if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then + echo $(date -Iseconds) WARN: ovn-master-metrics-cert not mounted after 20 minutes. + elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then + echo $(date -Iseconds) INFO: ovn-master-metrics-cert not mounted. Waiting 20 minutes. + HAS_LOGGED_INFO=1 + fi + } + while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do + log_missing_certs + sleep 5 + done + + echo $(date -Iseconds) INFO: ovn-master-metrics-certs mounted, starting kube-rbac-proxy + exec /usr/bin/kube-rbac-proxy \ + --logtostderr \ + --secure-listen-address=:9102 \ + --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ + --upstream=http://127.0.0.1:29102/ \ + --tls-private-key-file=${TLS_PK} \ + --tls-cert-file=${TLS_CERT} + ports: + - containerPort: 9102 + name: https + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: ovn-master-metrics-cert + mountPath: /etc/pki/tls/metrics-cert + readOnly: True + + # ovnkube local: convert kubernetes objects in to nbdb logical network components + - name: ovnkube-controller + image: "{{.OvnImage}}" + command: + - /bin/bash + - -c + - | + set -xe + if [[ -f "/env/_local" ]]; then + set -o allexport + source "/env/_local" + set +o allexport + fi + + if [ "{{.OVN_GATEWAY_MODE}}" == "shared" ]; then + gateway_mode_flags="--gateway-mode shared --gateway-interface br-ex" + elif [ "{{.OVN_GATEWAY_MODE}}" == "local" ]; then + gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" + else + echo "Invalid OVN_GATEWAY_MODE: \"{{.OVN_GATEWAY_MODE}}\". Must be \"local\" or \"shared\"." + exit 1 + fi + + multi_network_enabled_flag= + if [[ "{{.OVN_MULTI_NETWORK_ENABLE}}" == "true" ]]; then + multi_network_enabled_flag="--enable-multi-network" + fi + + echo "I$(date "+%m%d %H:%M:%S.%N") - ovnkube-controller - start ovnkube --init-network-controller-manager ${K8S_NODE}" + exec /usr/bin/ovnkube \ + --init-network-controller-manager "${K8S_NODE}" \ + --config-file=/run/ovnkube-config/ovnkube.conf \ + --ovn-empty-lb-events \ + --loglevel "${OVN_KUBE_LOG_LEVEL}" \ + --metrics-bind-address "127.0.0.1:29102" \ + --metrics-enable-pprof \ + --metrics-enable-config-duration \ + ${gateway_mode_flags} \ + --disable-snat-multiple-gws \ + ${multi_network_enabled_flag} \ + --enable-multicast \ + --zone ${K8S_NODE} \ + --enable-interconnect \ + --acl-logging-rate-limit "{{.OVNPolicyAuditRateLimit}}" + volumeMounts: + # for checking ovs-configuration service + - mountPath: /etc/systemd/system + name: systemd-units + readOnly: true + - mountPath: /etc/openvswitch/ + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch/ + name: var-lib-openvswitch + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /run/ovnkube-config/ + name: ovnkube-config + - mountPath: /env + name: env-overrides + - mountPath: /ovn-cert + name: ovn-cert + - mountPath: /ovn-ca + name: ovn-ca + resources: + requests: + cpu: 10m + memory: 300Mi + env: + - name: OVN_KUBE_LOG_LEVEL + value: "4" + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + ports: + - name: metrics-port + containerPort: 29102 + terminationMessagePolicy: FallbackToLogsOnError + # ovnkube-node: does node-level bookkeeping and configuration - name: ovnkube-node image: "{{.OvnImage}}" @@ -359,31 +759,13 @@ spec: if [[ -n "${OVNKUBE_NODE_MGMT_PORT_NETDEV}" ]] ; then node_mgmt_port_netdev_flags="--ovnkube-node-mgmt-port-netdev ${OVNKUBE_NODE_MGMT_PORT_NETDEV}" fi - if [[ -n "${OVNKUBE_NODE_MGMT_PORT_DP_RESOURCE_NAME}" ]] ; then - node_mgmt_port_netdev_flags="$node_mgmt_port_netdev_flags --ovnkube-node-mgmt-port-dp-resource-name ${OVNKUBE_NODE_MGMT_PORT_DP_RESOURCE_NAME}" - fi multi_network_enabled_flag= if [[ "{{.OVN_MULTI_NETWORK_ENABLE}}" == "true" ]]; then multi_network_enabled_flag="--enable-multi-network" fi - multi_network_policy_enabled_flag= - if [[ "{{.OVN_MULTI_NETWORK_POLICY_ENABLE}}" == "true" ]]; then - multi_network_policy_enabled_flag="--enable-multi-networkpolicy" - fi - exec /usr/bin/ovnkube --init-node "${K8S_NODE}" \ - --nb-address "{{.OVN_NB_DB_LIST}}" \ - --sb-address "{{.OVN_SB_DB_LIST}}" \ - --nb-client-privkey /ovn-cert/tls.key \ - --nb-client-cert /ovn-cert/tls.crt \ - --nb-client-cacert /ovn-ca/ca-bundle.crt \ - --nb-cert-common-name "{{.OVN_CERT_CN}}" \ - --sb-client-privkey /ovn-cert/tls.key \ - --sb-client-cert /ovn-cert/tls.crt \ - --sb-client-cacert /ovn-ca/ca-bundle.crt \ - --sb-cert-common-name "{{.OVN_CERT_CN}}" \ --config-file=/run/ovnkube-config/ovnkube.conf \ --loglevel "${OVN_KUBE_LOG_LEVEL}" \ --inactivity-probe="${OVN_CONTROLLER_INACTIVITY_PROBE}" \ @@ -399,7 +781,8 @@ spec: --disable-snat-multiple-gws \ ${export_network_flows_flags} \ ${multi_network_enabled_flag} \ - ${multi_network_policy_enabled_flag} \ + --zone ${K8S_NODE} \ + --enable-interconnect \ ${gw_interface_flag} env: # for kubectl @@ -435,10 +818,6 @@ spec: - name: IPFIX_SAMPLING value: "{{.IPFIXSampling}}" {{ end }} - {{ if and (.MgmtPortResourceName) (or (eq .OVN_NODE_MODE "smart-nic") (eq .OVN_NODE_MODE "dpu-host")) }} - - name: OVNKUBE_NODE_MGMT_PORT_DP_RESOURCE_NAME - value: {{ .MgmtPortResourceName }} - {{ end }} - name: K8S_NODE valueFrom: fieldRef: @@ -503,13 +882,6 @@ spec: requests: cpu: 10m memory: 300Mi - {{ if and (.MgmtPortResourceName) (or (eq .OVN_NODE_MODE "smart-nic") (eq .OVN_NODE_MODE "dpu-host")) }} - {{ .MgmtPortResourceName }}: '1' - {{ end }} - {{ if and (.MgmtPortResourceName) (or (eq .OVN_NODE_MODE "smart-nic") (eq .OVN_NODE_MODE "dpu-host")) }} - limits: - {{ .MgmtPortResourceName }}: '1' - {{ end }} lifecycle: preStop: exec: @@ -606,20 +978,20 @@ spec: path: /var/lib/openvswitch/data - name: etc-openvswitch hostPath: - path: /etc/openvswitch + path: /var/lib/ovnic/etc - name: run-openvswitch hostPath: path: /var/run/openvswitch - name: run-ovn hostPath: path: /var/run/ovn - {{ if or (eq .OVN_NODE_MODE "full") (eq .OVN_NODE_MODE "smart-nic") }} - # Used for placement of ACL audit logs + {{ if eq .OVN_NODE_MODE "full" }} + # Used for placement of ACL audit logs - name: node-log - hostPath: + hostPath: path: /var/log/ovn - name: log-socket - hostPath: + hostPath: path: /dev/log {{ end }} # For CNI server @@ -652,5 +1024,9 @@ spec: secret: secretName: ovn-node-metrics-cert optional: true + - name: ovn-master-metrics-cert + secret: + secretName: ovn-master-metrics-cert + optional: true tolerations: - operator: "Exists" diff --git a/bindata/network/ovn-kubernetes/self-hosted/ovnkube-master.yaml b/bindata/network/ovn-kubernetes/self-hosted/ovnkube-master.yaml deleted file mode 100644 index 106301052a..0000000000 --- a/bindata/network/ovn-kubernetes/self-hosted/ovnkube-master.yaml +++ /dev/null @@ -1,1021 +0,0 @@ -# The ovnkube control-plane components - -{{ if not .IsSNO }} -# The pod disruption budget ensures that we keep a raft quorum -apiVersion: policy/v1 -kind: PodDisruptionBudget -metadata: - name: ovn-raft-quorum-guard - namespace: openshift-ovn-kubernetes -spec: - minAvailable: {{.OVN_MIN_AVAILABLE}} - selector: - matchLabels: - app: ovnkube-master ---- -{{ end }} -kind: DaemonSet -apiVersion: apps/v1 -metadata: - name: ovnkube-master - namespace: openshift-ovn-kubernetes - annotations: - kubernetes.io/description: | - This daemonset launches the ovn-kubernetes controller (master) networking components. - release.openshift.io/version: "{{.ReleaseVersion}}" -spec: - selector: - matchLabels: - app: ovnkube-master - updateStrategy: - type: RollingUpdate - rollingUpdate: - # by default, Deployments spin up the new pod before terminating the old one - # but we don't want that - because ovsdb holds the lock. - maxSurge: 0 - maxUnavailable: 1 - template: - metadata: - annotations: - target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' - labels: - app: ovnkube-master - ovn-db-pod: "true" - component: network - type: infra - openshift.io/component: network - kubernetes.io/os: "linux" - spec: - serviceAccountName: ovn-kubernetes-controller - hostNetwork: true - dnsPolicy: Default - priorityClassName: "system-cluster-critical" - # volumes in all containers: - # (container) -> (host) - # /etc/openvswitch -> /var/lib/ovn/etc - ovsdb data - # /var/lib/openvswitch -> /var/lib/ovn/data - ovsdb pki state - # /run/openvswitch -> tmpfs - sockets - # /env -> configmap env-overrides - debug overrides - containers: - # ovn-northd: convert network objects in nbdb to flows in sbdb - - name: northd - image: "{{.OvnImage}}" - command: - - /bin/bash - - -c - - | - set -xem - if [[ -f /env/_master ]]; then - set -o allexport - source /env/_master - set +o allexport - fi - - quit() { - echo "$(date -Iseconds) - stopping ovn-northd" - OVN_MANAGE_OVSDB=no /usr/share/ovn/scripts/ovn-ctl stop_northd - echo "$(date -Iseconds) - ovn-northd stopped" - rm -f /var/run/ovn/ovn-northd.pid - exit 0 - } - # end of quit - trap quit TERM INT - - echo "$(date -Iseconds) - starting ovn-northd" - exec ovn-northd \ - --no-chdir "-vconsole:${OVN_LOG_LEVEL}" -vfile:off "-vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ - --ovnnb-db "{{.OVN_NB_DB_LIST}}" \ - --ovnsb-db "{{.OVN_SB_DB_LIST}}" \ - --pidfile /var/run/ovn/ovn-northd.pid \ - --n-threads={{.NorthdThreads}} \ - -p /ovn-cert/tls.key \ - -c /ovn-cert/tls.crt \ - -C /ovn-ca/ca-bundle.crt & - - wait $! - lifecycle: - preStop: - exec: - command: - - /bin/bash - - -c - - OVN_MANAGE_OVSDB=no /usr/share/ovn/scripts/ovn-ctl stop_northd - env: - - name: OVN_LOG_LEVEL - value: info - volumeMounts: - - mountPath: /etc/openvswitch/ - name: etc-openvswitch - - mountPath: /var/lib/openvswitch/ - name: var-lib-openvswitch - - mountPath: /run/openvswitch/ - name: run-openvswitch - - mountPath: /run/ovn/ - name: run-ovn - - mountPath: /env - name: env-overrides - - mountPath: /ovn-cert # not needed, but useful when exec'ing in to pod. - name: ovn-cert - - mountPath: /ovn-ca - name: ovn-ca - resources: - requests: - cpu: 10m - memory: 300Mi - terminationMessagePolicy: FallbackToLogsOnError - - # nbdb: the northbound, or logical network object DB. In raft mode - - name: nbdb - image: "{{.OvnImage}}" - command: - - /bin/bash - - -c - - | - set -xem - if [[ -f /env/_master ]]; then - set -o allexport - source /env/_master - set +o allexport - fi - - quit() { - echo "$(date -Iseconds) - stopping nbdb" - /usr/share/ovn/scripts/ovn-ctl stop_nb_ovsdb - echo "$(date -Iseconds) - nbdb stopped" - rm -f /var/run/ovn/ovnnb_db.pid - exit 0 - } - # end of quit - trap quit TERM INT - - bracketify() { case "$1" in *:*) echo "[$1]" ;; *) echo "$1" ;; esac } - # initialize variables - ovn_kubernetes_namespace=openshift-ovn-kubernetes - ovndb_ctl_ssl_opts="-p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt" - transport="ssl" - ovn_raft_conn_ip_url_suffix="" - if [[ "${K8S_NODE_IP}" == *":"* ]]; then - ovn_raft_conn_ip_url_suffix=":[::]" - fi - db="nb" - db_port="{{.OVN_NB_PORT}}" - ovn_db_file="/etc/ovn/ovn${db}_db.db" - # checks if a db pod is part of a current cluster - db_part_of_cluster() { - local pod=${1} - local db=${2} - local port=${3} - echo "Checking if ${pod} is part of cluster" - # TODO: change to use '--request-timeout=5s', if https://github.com/kubernetes/kubernetes/issues/49343 is fixed. - init_ip=$(timeout 5 kubectl get pod -n ${ovn_kubernetes_namespace} ${pod} -o=jsonpath='{.status.podIP}') - if [[ $? != 0 ]]; then - echo "Unable to get ${pod} ip " - return 1 - fi - echo "Found ${pod} ip: $init_ip" - init_ip=$(bracketify $init_ip) - target=$(ovn-${db}ctl --timeout=5 --db=${transport}:${init_ip}:${port} ${ovndb_ctl_ssl_opts} \ - --data=bare --no-headings --columns=target list connection) - if [[ "${target}" != "p${transport}:${port}${ovn_raft_conn_ip_url_suffix}" ]]; then - echo "Unable to check correct target ${target} " - return 1 - fi - echo "${pod} is part of cluster" - return 0 - } - # end of db_part_of_cluster - - # Checks if cluster has already been initialized. - # If not it returns false and sets init_ip to CLUSTER_INITIATOR_IP - cluster_exists() { - local db=${1} - local port=${2} - # TODO: change to use '--request-timeout=5s', if https://github.com/kubernetes/kubernetes/issues/49343 is fixed. - db_pods=$(timeout 5 kubectl get pod -n ${ovn_kubernetes_namespace} -o=jsonpath='{.items[*].metadata.name}' | egrep -o 'ovnkube-master-\w+' | grep -v "metrics") - - for db_pod in $db_pods; do - if db_part_of_cluster $db_pod $db $port; then - echo "${db_pod} is part of current cluster with ip: ${init_ip}!" - return 0 - fi - done - # if we get here there is no cluster, set init_ip and get out - init_ip=$(bracketify $CLUSTER_INITIATOR_IP) - return 1 - } - # end of cluster_exists() - - # RAFT clusters need an odd number of members to achieve consensus. - # The CNO determines which members make up the cluster, so if this container - # is not supposed to be part of the cluster, wait forever doing nothing - # (instad of exiting and causing CrashLoopBackoffs for no reason). - if [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":${K8S_NODE_IP}:".* ]] && [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":[${K8S_NODE_IP}]:".* ]]; then - echo "$(date -Iseconds) - not selected as RAFT member; sleeping..." - sleep 1500d - exit 0 - fi - - OVN_ARGS="--db-nb-cluster-local-port={{.OVN_NB_RAFT_PORT}} \ - --db-nb-cluster-local-addr=$(bracketify ${K8S_NODE_IP}) \ - --no-monitor \ - --db-nb-cluster-local-proto=ssl \ - --ovn-nb-db-ssl-key=/ovn-cert/tls.key \ - --ovn-nb-db-ssl-cert=/ovn-cert/tls.crt \ - --ovn-nb-db-ssl-ca-cert=/ovn-ca/ca-bundle.crt" - - CLUSTER_INITIATOR_IP="{{.OVN_DB_CLUSTER_INITIATOR}}" - echo "$(date -Iseconds) - starting nbdb CLUSTER_INITIATOR_IP=${CLUSTER_INITIATOR_IP}, K8S_NODE_IP=${K8S_NODE_IP}" - initialize="false" - - if [[ ! -e ${ovn_db_file} ]]; then - initialize="true" - fi - - if [[ "${initialize}" == "true" ]]; then - # check to see if a cluster already exists. If it does, just join it. - counter=0 - cluster_found=false - while [ $counter -lt 5 ]; do - if cluster_exists ${db} ${db_port}; then - cluster_found=true - break - fi - sleep 1 - counter=$((counter+1)) - done - - if ${cluster_found}; then - echo "Cluster already exists for DB: ${db}" - # join existing cluster - exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ - --db-nb-cluster-remote-port={{.OVN_NB_RAFT_PORT}} \ - --db-nb-cluster-remote-addr=${init_ip} \ - --db-nb-cluster-remote-proto=ssl \ - --ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ - run_nb_ovsdb & - - wait $! - else - # either we need to initialize a new cluster or wait for master to create it - if [[ "${K8S_NODE_IP}" == "${CLUSTER_INITIATOR_IP}" ]]; then - # set DB election timer at DB creation time if OVN supports it - election_timer= - if test -n "$(/usr/share/ovn/scripts/ovn-ctl --help 2>&1 | grep "\--db-nb-election-timer")"; then - election_timer="--db-nb-election-timer=$(({{.OVN_NB_RAFT_ELECTION_TIMER}}*1000))" - fi - - exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ - --ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ - ${election_timer} \ - run_nb_ovsdb & - - wait $! - else - echo "Joining the nbdb cluster with init_ip=${init_ip}..." - exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ - --db-nb-cluster-remote-port={{.OVN_NB_RAFT_PORT}} \ - --db-nb-cluster-remote-addr=${init_ip} \ - --db-nb-cluster-remote-proto=ssl \ - --ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ - run_nb_ovsdb & - - wait $! - fi - fi - else - exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ - --ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ - run_nb_ovsdb & - - wait $! - fi - - lifecycle: - postStart: - exec: - command: - - /bin/bash - - -c - - | - set -x - CLUSTER_INITIATOR_IP="{{.OVN_DB_CLUSTER_INITIATOR}}" - rm -f /var/run/ovn/ovnnb_db.pid - - # exit early if this DB is not supposed to be part of the cluster - if [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":${K8S_NODE_IP}:".* ]] && [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":[${K8S_NODE_IP}]:".* ]]; then - exit 0 - fi - - # retry an operation a number of times, sleeping 2 seconds between each try - retry() { - local tries=${1} - local desc=${2} - local cmd=${3} - - local retries=0 - while ! ${cmd}; do - (( retries += 1 )) - if [[ "${retries}" -gt ${tries} ]]; then - echo "$(date -Iseconds) - ERROR - nbdb ${desc} - too many failed attempts, giving up" - return 1 - fi - echo "$(date -Iseconds) - WARN - nbdb ${desc} - failed try ${retries}, retrying..." - sleep 2 - done - echo "$(date -Iseconds) - INFO - nbdb ${desc} - success" - return 0 - } - - if [[ "${K8S_NODE_IP}" == "${CLUSTER_INITIATOR_IP}" ]]; then - echo "$(date -Iseconds) - nbdb - postStart - waiting for master to be selected" - - # set the connection and inactivity probe - if ! retry 60 "inactivity-probe" "ovn-nbctl --no-leader-only -t 5 set-connection pssl:{{.OVN_NB_PORT}}{{.LISTEN_DUAL_STACK}} -- set connection . inactivity_probe={{.OVN_NB_INACTIVITY_PROBE}}"; then - exit 1 - fi - - # Upgrade the db if required. - DB_SCHEMA="/usr/share/ovn/ovn-nb.ovsschema" - DB_SERVER="unix:/var/run/ovn/ovnnb_db.sock" - schema_name=$(ovsdb-tool schema-name $DB_SCHEMA) - db_version=$(ovsdb-client -t 10 get-schema-version "$DB_SERVER" "$schema_name") - target_version=$(ovsdb-tool schema-version "$DB_SCHEMA") - - if ovsdb-tool compare-versions "$db_version" == "$target_version"; then - : - elif ovsdb-tool compare-versions "$db_version" ">" "$target_version"; then - echo "Database $schema_name has newer schema version ($db_version) than our local schema ($target_version), possibly an upgrade is partially complete?" - else - echo "Upgrading database $schema_name from schema version $db_version to $target_version" - ovsdb-client -t 30 convert "$DB_SERVER" "$DB_SCHEMA" - fi - fi - - # read the current northd_probe_interval from the DB - OVN_NB_CTL="ovn-nbctl -p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt --db "{{.OVN_NB_DB_LIST}}"" - northd_probe_interval=${OVN_NORTHD_PROBE_INTERVAL:-10000} - echo "Setting northd probe interval to ${northd_probe_interval} ms" - retries=0 - current_probe_interval=0 - while [[ "${retries}" -lt 20 ]]; do - current_probe_interval=$(${OVN_NB_CTL} --if-exists get NB_GLOBAL . options:northd_probe_interval) - if [[ $? == 0 ]]; then - current_probe_interval=$(echo ${current_probe_interval} | tr -d '\"') - break - else - sleep 2 - (( retries += 1 )) - fi - done - - # ensure the northd_probe_interval is set to the configured value - if [[ "${current_probe_interval}" != "${northd_probe_interval}" ]]; then - if ! retry 20 "northd-probe" "${OVN_NB_CTL} set NB_GLOBAL . options:northd_probe_interval=${northd_probe_interval}"; then - exit 1 - fi - fi - - # Enable/disable IPsec - {{ if .OVNIPsecEnable }} - ipsec=true - {{ else }} - ipsec=false - {{ end }} - if ! retry 20 "ipsec" "${OVN_NB_CTL} set nb_global . ipsec=${ipsec}"; then - exit 1 - fi - preStop: - exec: - command: - - /bin/bash - - -c - - | - echo "$(date -Iseconds) - stopping nbdb" - /usr/share/ovn/scripts/ovn-ctl stop_nb_ovsdb - echo "$(date -Iseconds) - nbdb stopped" - rm -f /var/run/ovn/ovnnb_db.pid - readinessProbe: -{{ if not .IsSNO }} - initialDelaySeconds: 90 -{{ end }} - timeoutSeconds: 5 - exec: - command: - - /bin/bash - - -c - - | - set -xeo pipefail - - # exit early if this DB is not supposed to be part of the cluster - if [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":${K8S_NODE_IP}:".* ]] && [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":[${K8S_NODE_IP}]:".* ]]; then - exit 0 - fi - - leader_status=$(/usr/bin/ovn-appctl -t /var/run/ovn/ovnnb_db.ctl --timeout=3 cluster/status OVN_Northbound 2>/dev/null | { grep "Leader: unknown" || true; }) - if [[ ! -z "${leader_status}" ]]; then - echo "NB DB Raft leader is unknown to the cluster node." - exit 1 - fi - # set trim-on-compaction - /usr/bin/ovn-appctl -t /var/run/ovn/ovnnb_db.ctl --timeout=5 ovsdb-server/memory-trim-on-compaction on 2>/dev/null - env: - - name: OVN_LOG_LEVEL - value: info - - name: OVN_NORTHD_PROBE_INTERVAL - value: "{{.OVN_NORTHD_PROBE_INTERVAL}}" - - name: K8S_NODE_IP - valueFrom: - fieldRef: - fieldPath: status.hostIP - volumeMounts: - - mountPath: /etc/openvswitch/ - name: etc-openvswitch - - mountPath: /etc/ovn/ - name: etc-openvswitch - - mountPath: /var/lib/openvswitch/ - name: var-lib-openvswitch - - mountPath: /run/openvswitch/ - name: run-openvswitch - - mountPath: /run/ovn/ - name: run-ovn - - mountPath: /env - name: env-overrides - - mountPath: /ovn-cert - name: ovn-cert - - mountPath: /ovn-ca - name: ovn-ca - resources: - requests: - cpu: 10m - memory: 300Mi - ports: - - name: nb-db-port - containerPort: {{.OVN_NB_PORT}} - - name: nb-db-raft-port - containerPort: {{.OVN_NB_RAFT_PORT}} - terminationMessagePolicy: FallbackToLogsOnError - - - name: kube-rbac-proxy - image: {{.KubeRBACProxyImage}} - command: - - /bin/bash - - -c - - | - #!/bin/bash - set -euo pipefail - TLS_PK=/etc/pki/tls/metrics-cert/tls.key - TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt - # As the secret mount is optional we must wait for the files to be present. - # The service is created in monitor.yaml and this is created in sdn.yaml. - TS=$(date +%s) - WARN_TS=$(( ${TS} + $(( 20 * 60)) )) - HAS_LOGGED_INFO=0 - - log_missing_certs(){ - CUR_TS=$(date +%s) - if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then - echo $(date -Iseconds) WARN: ovn-master-metrics-cert not mounted after 20 minutes. - elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then - echo $(date -Iseconds) INFO: ovn-master-metrics-cert not mounted. Waiting 20 minutes. - HAS_LOGGED_INFO=1 - fi - } - while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do - log_missing_certs - sleep 5 - done - - echo $(date -Iseconds) INFO: ovn-master-metrics-certs mounted, starting kube-rbac-proxy - exec /usr/bin/kube-rbac-proxy \ - --logtostderr \ - --secure-listen-address=:9102 \ - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ - --upstream=http://127.0.0.1:29102/ \ - --tls-private-key-file=${TLS_PK} \ - --tls-cert-file=${TLS_CERT} - ports: - - containerPort: 9102 - name: https - resources: - requests: - cpu: 10m - memory: 20Mi - terminationMessagePolicy: FallbackToLogsOnError - volumeMounts: - - name: ovn-master-metrics-cert - mountPath: /etc/pki/tls/metrics-cert - readOnly: True - # sbdb: The southbound, or flow DB. In raft mode - - name: sbdb - image: "{{.OvnImage}}" - command: - - /bin/bash - - -c - - | - set -xm - if [[ -f /env/_master ]]; then - set -o allexport - source /env/_master - set +o allexport - fi - - quit() { - echo "$(date -Iseconds) - stopping sbdb" - /usr/share/ovn/scripts/ovn-ctl stop_sb_ovsdb - echo "$(date -Iseconds) - sbdb stopped" - rm -f /var/run/ovn/ovnsb_db.pid - exit 0 - } - # end of quit - trap quit TERM INT - - bracketify() { case "$1" in *:*) echo "[$1]" ;; *) echo "$1" ;; esac } - - # initialize variables - ovn_kubernetes_namespace=openshift-ovn-kubernetes - ovndb_ctl_ssl_opts="-p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt" - transport="ssl" - ovn_raft_conn_ip_url_suffix="" - if [[ "${K8S_NODE_IP}" == *":"* ]]; then - ovn_raft_conn_ip_url_suffix=":[::]" - fi - db="sb" - db_port="{{.OVN_SB_PORT}}" - ovn_db_file="/etc/ovn/ovn${db}_db.db" - # checks if a db pod is part of a current cluster - db_part_of_cluster() { - local pod=${1} - local db=${2} - local port=${3} - echo "Checking if ${pod} is part of cluster" - # TODO: change to use '--request-timeout=5s', if https://github.com/kubernetes/kubernetes/issues/49343 is fixed. - init_ip=$(timeout 5 kubectl get pod -n ${ovn_kubernetes_namespace} ${pod} -o=jsonpath='{.status.podIP}') - if [[ $? != 0 ]]; then - echo "Unable to get ${pod} ip " - return 1 - fi - echo "Found ${pod} ip: $init_ip" - init_ip=$(bracketify $init_ip) - target=$(ovn-${db}ctl --timeout=5 --db=${transport}:${init_ip}:${port} ${ovndb_ctl_ssl_opts} \ - --data=bare --no-headings --columns=target list connection) - if [[ "${target}" != "p${transport}:${port}${ovn_raft_conn_ip_url_suffix}" ]]; then - echo "Unable to check correct target ${target} " - return 1 - fi - echo "${pod} is part of cluster" - return 0 - } - # end of db_part_of_cluster - - # Checks if cluster has already been initialized. - # If not it returns false and sets init_ip to CLUSTER_INITIATOR_IP - cluster_exists() { - local db=${1} - local port=${2} - # TODO: change to use '--request-timeout=5s', if https://github.com/kubernetes/kubernetes/issues/49343 is fixed. - db_pods=$(timeout 5 kubectl get pod -n ${ovn_kubernetes_namespace} -o=jsonpath='{.items[*].metadata.name}' | egrep -o 'ovnkube-master-\w+' | grep -v "metrics") - - for db_pod in $db_pods; do - if db_part_of_cluster $db_pod $db $port; then - echo "${db_pod} is part of current cluster with ip: ${init_ip}!" - return 0 - fi - done - # if we get here there is no cluster, set init_ip and get out - init_ip=$(bracketify $CLUSTER_INITIATOR_IP) - return 1 - } - # end of cluster_exists() - - # RAFT clusters need an odd number of members to achieve consensus. - # The CNO determines which members make up the cluster, so if this container - # is not supposed to be part of the cluster, wait forever doing nothing - # (instad of exiting and causing CrashLoopBackoffs for no reason). - if [[ ! "{{.OVN_SB_DB_LIST}}" =~ .*":${K8S_NODE_IP}:".* ]] && [[ ! "{{.OVN_SB_DB_LIST}}" =~ .*":[${K8S_NODE_IP}]:".* ]]; then - echo "$(date -Iseconds) - not selected as RAFT member; sleeping..." - sleep 1500d - exit 0 - fi - - OVN_ARGS="--db-sb-cluster-local-port={{.OVN_SB_RAFT_PORT}} \ - --db-sb-cluster-local-addr=$(bracketify ${K8S_NODE_IP}) \ - --no-monitor \ - --db-sb-cluster-local-proto=ssl \ - --ovn-sb-db-ssl-key=/ovn-cert/tls.key \ - --ovn-sb-db-ssl-cert=/ovn-cert/tls.crt \ - --ovn-sb-db-ssl-ca-cert=/ovn-ca/ca-bundle.crt" - - CLUSTER_INITIATOR_IP="{{.OVN_DB_CLUSTER_INITIATOR}}" - echo "$(date -Iseconds) - starting sbdb CLUSTER_INITIATOR_IP=${CLUSTER_INITIATOR_IP}" - initialize="false" - - if [[ ! -e ${ovn_db_file} ]]; then - initialize="true" - fi - - if [[ "${initialize}" == "true" ]]; then - # check to see if a cluster already exists. If it does, just join it. - counter=0 - cluster_found=false - while [ $counter -lt 5 ]; do - if cluster_exists ${db} ${db_port}; then - cluster_found=true - break - fi - sleep 1 - counter=$((counter+1)) - done - - if ${cluster_found}; then - echo "Cluster already exists for DB: ${db}" - # join existing cluster - exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ - --db-sb-cluster-remote-port={{.OVN_SB_RAFT_PORT}} \ - --db-sb-cluster-remote-addr=${init_ip} \ - --db-sb-cluster-remote-proto=ssl \ - --ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ - run_sb_ovsdb & - - wait $! - else - # either we need to initialize a new cluster or wait for master to create it - if [[ "${K8S_NODE_IP}" == "${CLUSTER_INITIATOR_IP}" ]]; then - # set DB election timer at DB creation time if OVN supports it - election_timer= - if test -n "$(/usr/share/ovn/scripts/ovn-ctl --help 2>&1 | grep "\--db-sb-election-timer")"; then - election_timer="--db-sb-election-timer=$(({{.OVN_SB_RAFT_ELECTION_TIMER}}*1000))" - fi - - exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ - --ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ - ${election_timer} \ - run_sb_ovsdb & - - wait $! - else - exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ - --db-sb-cluster-remote-port={{.OVN_SB_RAFT_PORT}} \ - --db-sb-cluster-remote-addr=${init_ip} \ - --db-sb-cluster-remote-proto=ssl \ - --ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ - run_sb_ovsdb & - - wait $! - fi - fi - else - exec /usr/share/ovn/scripts/ovn-ctl ${OVN_ARGS} \ - --ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:{{.OVN_LOG_PATTERN_CONSOLE}}" \ - run_sb_ovsdb & - - wait $! - fi - lifecycle: - postStart: - exec: - command: - - /bin/bash - - -c - - | - set -x - CLUSTER_INITIATOR_IP="{{.OVN_DB_CLUSTER_INITIATOR}}" - rm -f /var/run/ovn/ovnsb_db.pid - - # exit early if this DB is not supposed to be part of the cluster - if [[ ! "{{.OVN_SB_DB_LIST}}" =~ .*":${K8S_NODE_IP}:".* ]] && [[ ! "{{.OVN_SB_DB_LIST}}" =~ .*":[${K8S_NODE_IP}]:".* ]]; then - exit 0 - fi - - # retry an operation a number of times, sleeping 2 seconds between each try - retry() { - local tries=${1} - local desc=${2} - local cmd=${3} - - local retries=0 - while ! ${cmd}; do - (( retries += 1 )) - if [[ "${retries}" -gt ${tries} ]]; then - echo "$(date -Iseconds) - ERROR - nbdb ${desc} - too many failed attempts, giving up" - return 1 - fi - echo "$(date -Iseconds) - WARN - nbdb ${desc} - failed try ${retries}, retrying..." - sleep 2 - done - echo "$(date -Iseconds) - INFO - nbdb ${desc} - success" - return 0 - } - - if [[ "${K8S_NODE_IP}" == "${CLUSTER_INITIATOR_IP}" ]]; then - echo "$(date -Iseconds) - sdb - postStart - waiting for master to be selected" - - # set the connection and inactivity probe - if ! retry 60 "inactivity-probe" "ovn-sbctl --no-leader-only -t 5 set-connection pssl:{{.OVN_SB_PORT}}{{.LISTEN_DUAL_STACK}} -- set connection . inactivity_probe={{.OVN_CONTROLLER_INACTIVITY_PROBE}}"; then - exit 1 - fi - - # Upgrade the db if required. - DB_SCHEMA="/usr/share/ovn/ovn-sb.ovsschema" - DB_SERVER="unix:/var/run/ovn/ovnsb_db.sock" - schema_name=$(ovsdb-tool schema-name $DB_SCHEMA) - db_version=$(ovsdb-client -t 10 get-schema-version "$DB_SERVER" "$schema_name") - target_version=$(ovsdb-tool schema-version "$DB_SCHEMA") - - if ovsdb-tool compare-versions "$db_version" == "$target_version"; then - : - elif ovsdb-tool compare-versions "$db_version" ">" "$target_version"; then - echo "Database $schema_name has newer schema version ($db_version) than our local schema ($target_version), possibly an upgrade is partially complete?" - else - echo "Upgrading database $schema_name from schema version $db_version to $target_version" - ovsdb-client -t 30 convert "$DB_SERVER" "$DB_SCHEMA" - fi - fi - - # Kill some time while the cluster converges by checking IPsec status - OVN_SB_CTL="ovn-sbctl -p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt --db "{{.OVN_SB_DB_LIST}}"" - if ! retry 20 "ipsec" "${OVN_SB_CTL} get sb_global . ipsec"; then - exit 1 - fi - preStop: - exec: - command: - - /bin/bash - - -c - - | - echo "$(date -Iseconds) - stopping sbdb" - /usr/share/ovn/scripts/ovn-ctl stop_sb_ovsdb - echo "$(date -Iseconds) - sbdb stopped" - rm -f /var/run/ovn/ovnsb_db.pid - readinessProbe: -{{ if not .IsSNO }} - initialDelaySeconds: 90 -{{ end }} - timeoutSeconds: 5 - exec: - command: - - /bin/bash - - -c - - | - set -xeo pipefail - - # exit early if this DB is not supposed to be part of the cluster - if [[ ! "{{.OVN_SB_DB_LIST}}" =~ .*":${K8S_NODE_IP}:".* ]] && [[ ! "{{.OVN_SB_DB_LIST}}" =~ .*":[${K8S_NODE_IP}]:".* ]]; then - exit 0 - fi - - leader_status=$(/usr/bin/ovn-appctl -t /var/run/ovn/ovnsb_db.ctl --timeout=3 cluster/status OVN_Southbound 2>/dev/null | { grep "Leader: unknown" || true; }) - if [[ ! -z "${leader_status}" ]]; then - echo "SB DB Raft leader is unknown to the cluster node." - exit 1 - fi - # set trim-on-compaction - /usr/bin/ovn-appctl -t /var/run/ovn/ovnsb_db.ctl --timeout=5 ovsdb-server/memory-trim-on-compaction on 2>/dev/null - env: - - name: OVN_LOG_LEVEL - value: info - - name: K8S_NODE_IP - valueFrom: - fieldRef: - fieldPath: status.hostIP - volumeMounts: - - mountPath: /etc/openvswitch/ - name: etc-openvswitch - - mountPath: /etc/ovn/ - name: etc-openvswitch - - mountPath: /var/lib/openvswitch/ - name: var-lib-openvswitch - - mountPath: /run/openvswitch/ - name: run-openvswitch - - mountPath: /run/ovn/ - name: run-ovn - - mountPath: /env - name: env-overrides - - mountPath: /ovn-cert - name: ovn-cert - - mountPath: /ovn-ca - name: ovn-ca - ports: - - name: sb-db-port - containerPort: {{.OVN_SB_PORT}} - - name: sb-db-raft-port - containerPort: {{.OVN_SB_RAFT_PORT}} - resources: - requests: - cpu: 10m - memory: 300Mi - terminationMessagePolicy: FallbackToLogsOnError - - # ovnkube master: convert kubernetes objects in to nbdb logical network components - - name: ovnkube-master - image: "{{.OvnImage}}" - command: - - /bin/bash - - -c - - | - set -xe - if [[ -f "/env/_master" ]]; then - set -o allexport - source "/env/_master" - set +o allexport - fi - - if [ "{{.OVN_GATEWAY_MODE}}" == "shared" ]; then - gateway_mode_flags="--gateway-mode shared --gateway-interface br-ex" - elif [ "{{.OVN_GATEWAY_MODE}}" == "local" ]; then - gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" - else - echo "Invalid OVN_GATEWAY_MODE: \"{{.OVN_GATEWAY_MODE}}\". Must be \"local\" or \"shared\"." - exit 1 - fi - - multi_network_enabled_flag= - if [[ "{{.OVN_MULTI_NETWORK_ENABLE}}" == "true" ]]; then - multi_network_enabled_flag="--enable-multi-network" - fi - - multi_network_policy_enabled_flag= - if [[ "{{.OVN_MULTI_NETWORK_POLICY_ENABLE}}" == "true" ]]; then - multi_network_policy_enabled_flag="--enable-multi-networkpolicy" - fi - - echo "I$(date "+%m%d %H:%M:%S.%N") - ovnkube-master - start ovnkube --init-master ${K8S_NODE}" - exec /usr/bin/ovnkube \ - --init-master "${K8S_NODE}" \ - --config-file=/run/ovnkube-config/ovnkube.conf \ - --ovn-empty-lb-events \ - --loglevel "${OVN_KUBE_LOG_LEVEL}" \ - --metrics-bind-address "127.0.0.1:29102" \ - --metrics-enable-pprof \ - --metrics-enable-config-duration \ - ${gateway_mode_flags} \ - --sb-address "{{.OVN_SB_DB_LIST}}" \ - --sb-client-privkey /ovn-cert/tls.key \ - --sb-client-cert /ovn-cert/tls.crt \ - --sb-client-cacert /ovn-ca/ca-bundle.crt \ - --sb-cert-common-name "{{.OVN_CERT_CN}}" \ - --nb-address "{{.OVN_NB_DB_LIST}}" \ - --nb-client-privkey /ovn-cert/tls.key \ - --nb-client-cert /ovn-cert/tls.crt \ - --nb-client-cacert /ovn-ca/ca-bundle.crt \ - --nb-cert-common-name "{{.OVN_CERT_CN}}" \ - --enable-multicast \ - --disable-snat-multiple-gws \ - ${multi_network_enabled_flag} \ - ${multi_network_policy_enabled_flag} \ - --acl-logging-rate-limit "{{.OVNPolicyAuditRateLimit}}" - volumeMounts: - # for checking ovs-configuration service - - mountPath: /etc/systemd/system - name: systemd-units - readOnly: true - - mountPath: /etc/openvswitch/ - name: etc-openvswitch - - mountPath: /etc/ovn/ - name: etc-openvswitch - - mountPath: /var/lib/openvswitch/ - name: var-lib-openvswitch - - mountPath: /run/openvswitch/ - name: run-openvswitch - - mountPath: /run/ovn/ - name: run-ovn - - mountPath: /run/ovnkube-config/ - name: ovnkube-config - - mountPath: /env - name: env-overrides - - mountPath: /ovn-cert - name: ovn-cert - - mountPath: /ovn-ca - name: ovn-ca - resources: - requests: - cpu: 10m - memory: 300Mi - env: - - name: OVN_KUBE_LOG_LEVEL - value: "4" - - name: K8S_NODE - valueFrom: - fieldRef: - fieldPath: spec.nodeName - ports: - - name: metrics-port - containerPort: 29102 - terminationMessagePolicy: FallbackToLogsOnError - # ovn-dbchecker: monitor clustered ovn databases for db health and stale raft members - - name: ovn-dbchecker - image: "{{.OvnImage}}" - command: - - /bin/bash - - -c - - | - set -xe - if [[ -f "/env/_master" ]]; then - set -o allexport - source "/env/_master" - set +o allexport - fi - - echo "I$(date "+%m%d %H:%M:%S.%N") - ovn-dbchecker - start ovn-dbchecker" - - # RAFT clusters need an odd number of members to achieve consensus. - # The CNO determines which members make up the cluster, so if this container - # is not supposed to be part of the cluster, wait forever doing nothing - # (instad of exiting and causing CrashLoopBackoffs for no reason). - if [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":${K8S_NODE_IP}:".* ]] && [[ ! "{{.OVN_NB_DB_LIST}}" =~ .*":[${K8S_NODE_IP}]:".* ]]; then - echo "$(date -Iseconds) - not selected as RAFT member; sleeping..." - sleep 1500d - exit 0 - fi - - exec /usr/bin/ovndbchecker \ - --config-file=/run/ovnkube-config/ovnkube.conf \ - --loglevel "${OVN_KUBE_LOG_LEVEL}" \ - --sb-address "{{.OVN_SB_DB_LIST}}" \ - --sb-client-privkey /ovn-cert/tls.key \ - --sb-client-cert /ovn-cert/tls.crt \ - --sb-client-cacert /ovn-ca/ca-bundle.crt \ - --sb-cert-common-name "{{.OVN_CERT_CN}}" \ - --sb-raft-election-timer "{{.OVN_SB_RAFT_ELECTION_TIMER}}" \ - --nb-address "{{.OVN_NB_DB_LIST}}" \ - --nb-client-privkey /ovn-cert/tls.key \ - --nb-client-cert /ovn-cert/tls.crt \ - --nb-client-cacert /ovn-ca/ca-bundle.crt \ - --nb-cert-common-name "{{.OVN_CERT_CN}}" \ - --nb-raft-election-timer "{{.OVN_NB_RAFT_ELECTION_TIMER}}" - volumeMounts: - - mountPath: /etc/openvswitch/ - name: etc-openvswitch - - mountPath: /etc/ovn/ - name: etc-openvswitch - - mountPath: /var/lib/openvswitch/ - name: var-lib-openvswitch - - mountPath: /run/openvswitch/ - name: run-openvswitch - - mountPath: /run/ovn/ - name: run-ovn - - mountPath: /run/ovnkube-config/ - name: ovnkube-config - - mountPath: /env - name: env-overrides - - mountPath: /ovn-cert - name: ovn-cert - - mountPath: /ovn-ca - name: ovn-ca - resources: - requests: - cpu: 10m - memory: 300Mi - env: - - name: OVN_KUBE_LOG_LEVEL - value: "4" - - name: K8S_NODE_IP - valueFrom: - fieldRef: - fieldPath: status.hostIP - terminationMessagePolicy: FallbackToLogsOnError - nodeSelector: - node-role.kubernetes.io/master: "" - beta.kubernetes.io/os: "linux" - volumes: - # for checking ovs-configuration service - - name: systemd-units - hostPath: - path: /etc/systemd/system - - name: etc-openvswitch - hostPath: - path: /var/lib/ovn/etc - - name: var-lib-openvswitch - hostPath: - path: /var/lib/ovn/data - - name: run-openvswitch - hostPath: - path: /var/run/openvswitch - - name: run-ovn - hostPath: - path: /var/run/ovn - - name: ovnkube-config - configMap: - name: ovnkube-config - - name: env-overrides - configMap: - name: env-overrides - optional: true - - name: ovn-ca - configMap: - name: ovn-ca - - name: ovn-cert - secret: - secretName: ovn-cert - - name: ovn-master-metrics-cert - secret: - secretName: ovn-master-metrics-cert - optional: true - tolerations: - - key: "node-role.kubernetes.io/master" - operator: "Exists" - - key: "node.kubernetes.io/not-ready" - operator: "Exists" - - key: "node.kubernetes.io/unreachable" - operator: "Exists" - - key: "node.kubernetes.io/network-unavailable" - operator: "Exists" diff --git a/pkg/controller/statusmanager/pod_status.go b/pkg/controller/statusmanager/pod_status.go index c0717e52bb..8dc06aa7d6 100644 --- a/pkg/controller/statusmanager/pod_status.go +++ b/pkg/controller/statusmanager/pod_status.go @@ -102,7 +102,7 @@ func (status *StatusManager) SetFromPods() { if !isNonCritical(ds) { hung = append(hung, status.CheckCrashLoopBackOffPods(dsName, ds.Spec.Selector.MatchLabels, "DaemonSet")...) } - } else if ds.Status.NumberAvailable == 0 { // NOTE: update this if we ever expect empty (unscheduled) daemonsets ~cdc + } else if ds.Status.NumberAvailable == 0 && ds.Status.DesiredNumberScheduled > 0 { progressing = append(progressing, fmt.Sprintf("DaemonSet %q is not yet scheduled on any nodes", dsName.String())) dsProgressing = true } else if ds.Generation > ds.Status.ObservedGeneration { diff --git a/pkg/network/ovn_kubernetes.go b/pkg/network/ovn_kubernetes.go index b8831cd86e..33fea657cb 100644 --- a/pkg/network/ovn_kubernetes.go +++ b/pkg/network/ovn_kubernetes.go @@ -103,7 +103,7 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo // render the manifests on disk data := render.MakeRenderData() data.Data["ReleaseVersion"] = os.Getenv("RELEASE_VERSION") - data.Data["OvnImage"] = os.Getenv("OVN_IMAGE") + data.Data["OvnImage"] = "quay.io/pdiak/ovn-kubernetes:upstream-pr-3699" //os.Getenv("OVN_IMAGE") data.Data["OvnkubeMasterReplicas"] = len(bootstrapResult.OVN.MasterAddresses) data.Data["KubeRBACProxyImage"] = os.Getenv("KUBE_RBAC_PROXY_IMAGE") data.Data["Socks5ProxyImage"] = os.Getenv("SOCKS5_PROXY_IMAGE")