|
| 1 | +#!/bin/sh |
| 2 | + |
| 3 | +set -o nounset |
| 4 | +set -o errexit |
| 5 | +set -o pipefail |
| 6 | + |
| 7 | +# This script tests the kube-proxy image without actually using it as |
| 8 | +# part of the infrastructure of a cluster. It is intended to be copied |
| 9 | +# to the kubernetes-tests image for use in CI and should have no |
| 10 | +# dependencies beyond oc and basic shell stuff. |
| 11 | + |
| 12 | +# There is no good way to "properly" test the kube-proxy image in |
| 13 | +# OpenShift CI, because it is only used as a dependency of third-party |
| 14 | +# software (e.g. Calico); no fully-RH-supported configuration uses it. |
| 15 | +# |
| 16 | +# However, since we don't apply any kube-proxy-specific patches to our |
| 17 | +# tree, we can assume that it *mostly* works, since we are building |
| 18 | +# from sources that passed upstream testing. This script is just to |
| 19 | +# confirm that our build is not somehow completely broken (e.g. |
| 20 | +# immediate segfault due to a bad build environment). |
| 21 | + |
| 22 | +if [[ -z "${KUBE_PROXY_IMAGE}" ]]; then |
| 23 | + echo "KUBE_PROXY_IMAGE not set" 1>&2 |
| 24 | + exit 1 |
| 25 | +fi |
| 26 | + |
| 27 | +TMPDIR=$(mktemp --tmpdir -d kube-proxy.XXXXXX) |
| 28 | +function cleanup() { |
| 29 | + oc delete namespace kube-proxy-test || true |
| 30 | + oc delete clusterrole kube-proxy-test || true |
| 31 | + oc delete clusterrolebinding kube-proxy-test || true |
| 32 | + rm -rf "${TMPDIR}" |
| 33 | +} |
| 34 | +trap "cleanup" EXIT |
| 35 | + |
| 36 | +function indent() { |
| 37 | + sed -e 's/^/ /' "$@" |
| 38 | + echo "" |
| 39 | +} |
| 40 | + |
| 41 | +# Decide what kube-proxy mode to use. |
| 42 | +# (jsonpath expression copied from types_cluster_version.go) |
| 43 | +OCP_VERSION=$(oc get clusterversion version -o jsonpath='{.status.history[?(@.state=="Completed")].version}') |
| 44 | +case "${OCP_VERSION}" in |
| 45 | + 4.17.*|4.18.*) |
| 46 | + # 4.17 and 4.18 always use RHEL 9 (and nftables mode was still alpha in 4.17), so |
| 47 | + # use iptables mode |
| 48 | + PROXY_MODE="iptables" |
| 49 | + ;; |
| 50 | + *) |
| 51 | + # 4.19 and later may use RHEL 10, so use nftables mode |
| 52 | + PROXY_MODE="nftables" |
| 53 | + ;; |
| 54 | +esac |
| 55 | + |
| 56 | +echo "Setting up Namespace and RBAC" |
| 57 | +oc create -f - <<EOF |
| 58 | +apiVersion: v1 |
| 59 | +kind: Namespace |
| 60 | +metadata: |
| 61 | + name: kube-proxy-test |
| 62 | + labels: |
| 63 | + pod-security.kubernetes.io/enforce: privileged |
| 64 | + pod-security.kubernetes.io/audit: privileged |
| 65 | + pod-security.kubernetes.io/warn: privileged |
| 66 | +--- |
| 67 | +apiVersion: rbac.authorization.k8s.io/v1 |
| 68 | +kind: ClusterRole |
| 69 | +metadata: |
| 70 | + name: kube-proxy-test |
| 71 | +rules: |
| 72 | +- apiGroups: [""] |
| 73 | + resources: |
| 74 | + - namespaces |
| 75 | + - endpoints |
| 76 | + - services |
| 77 | + - pods |
| 78 | + - nodes |
| 79 | + verbs: |
| 80 | + - get |
| 81 | + - list |
| 82 | + - watch |
| 83 | +- apiGroups: ["discovery.k8s.io"] |
| 84 | + resources: |
| 85 | + - endpointslices |
| 86 | + verbs: |
| 87 | + - get |
| 88 | + - list |
| 89 | + - watch |
| 90 | +--- |
| 91 | +apiVersion: v1 |
| 92 | +kind: ServiceAccount |
| 93 | +metadata: |
| 94 | + name: kube-proxy-test |
| 95 | + namespace: kube-proxy-test |
| 96 | +--- |
| 97 | +apiVersion: rbac.authorization.k8s.io/v1 |
| 98 | +kind: ClusterRoleBinding |
| 99 | +metadata: |
| 100 | + name: kube-proxy-test |
| 101 | +roleRef: |
| 102 | + apiGroup: rbac.authorization.k8s.io |
| 103 | + kind: ClusterRole |
| 104 | + name: kube-proxy-test |
| 105 | +subjects: |
| 106 | +- kind: ServiceAccount |
| 107 | + name: kube-proxy-test |
| 108 | + namespace: kube-proxy-test |
| 109 | +EOF |
| 110 | +echo "" |
| 111 | + |
| 112 | +# We run kube-proxy in a pod-network pod, so that it can create rules |
| 113 | +# in that pod's network namespace without interfering with |
| 114 | +# ovn-kubernetes in the host network namespace. |
| 115 | +# |
| 116 | +# We need to manually set all of the conntrack values to 0 so it won't |
| 117 | +# try to set the sysctls (which would fail). This is the most fragile |
| 118 | +# part of this script in terms of future compatibility. Likewise, we |
| 119 | +# need to set .iptables.localhostNodePorts=false so it won't try to |
| 120 | +# set the sysctl associated with that. (The nftables mode never tries |
| 121 | +# to set that sysctl.) |
| 122 | +oc create -f - <<EOF |
| 123 | +apiVersion: v1 |
| 124 | +kind: ConfigMap |
| 125 | +metadata: |
| 126 | + name: config |
| 127 | + namespace: kube-proxy-test |
| 128 | +data: |
| 129 | + kube-proxy-config.yaml: |- |
| 130 | + apiVersion: kubeproxy.config.k8s.io/v1alpha1 |
| 131 | + kind: KubeProxyConfiguration |
| 132 | + conntrack: |
| 133 | + maxPerCore: 0 |
| 134 | + min: 0 |
| 135 | + tcpCloseWaitTimeout: 0s |
| 136 | + tcpEstablishedTimeout: 0s |
| 137 | + udpStreamTimeout: 0s |
| 138 | + udpTimeout: 0s |
| 139 | + iptables: |
| 140 | + localhostNodePorts: false |
| 141 | + mode: ${PROXY_MODE} |
| 142 | +EOF |
| 143 | +echo "config is:" |
| 144 | +oc get configmap -n kube-proxy-test config -o yaml | indent |
| 145 | + |
| 146 | +# The --hostname-override is needed to fake out the node detection, |
| 147 | +# since we aren't running in a host-network pod. (The fact that we're |
| 148 | +# cheating here means we'll end up generating incorrect NodePort rules |
| 149 | +# but that doesn't matter.) |
| 150 | +oc create -f - <<EOF |
| 151 | +apiVersion: v1 |
| 152 | +kind: Pod |
| 153 | +metadata: |
| 154 | + name: kube-proxy |
| 155 | + namespace: kube-proxy-test |
| 156 | +spec: |
| 157 | + containers: |
| 158 | + - name: kube-proxy |
| 159 | + image: ${KUBE_PROXY_IMAGE} |
| 160 | + command: |
| 161 | + - /bin/sh |
| 162 | + - -c |
| 163 | + - exec kube-proxy --hostname-override "\${NODENAME}" --config /config/kube-proxy-config.yaml -v 4 |
| 164 | + env: |
| 165 | + - name: NODENAME |
| 166 | + valueFrom: |
| 167 | + fieldRef: |
| 168 | + fieldPath: spec.nodeName |
| 169 | + securityContext: |
| 170 | + privileged: true |
| 171 | + volumeMounts: |
| 172 | + - mountPath: /config |
| 173 | + name: config |
| 174 | + readOnly: true |
| 175 | + serviceAccountName: kube-proxy-test |
| 176 | + volumes: |
| 177 | + - name: config |
| 178 | + configMap: |
| 179 | + name: config |
| 180 | +EOF |
| 181 | +echo "pod is:" |
| 182 | +oc get pod -n kube-proxy-test kube-proxy -o yaml | indent |
| 183 | +oc wait --for=condition=Ready -n kube-proxy-test pod/kube-proxy |
| 184 | + |
| 185 | +echo "Waiting for kube-proxy to program initial ${PROXY_MODE} rules..." |
| 186 | +function kube_proxy_synced() { |
| 187 | + oc exec -n kube-proxy-test kube-proxy -- curl -s http://127.0.0.1:10249/metrics > "${TMPDIR}/metrics.txt" |
| 188 | + grep -q '^kubeproxy_sync_proxy_rules_duration_seconds_count [^0]' "${TMPDIR}/metrics.txt" |
| 189 | +} |
| 190 | +synced=false |
| 191 | +for count in $(seq 1 10); do |
| 192 | + date |
| 193 | + if kube_proxy_synced; then |
| 194 | + synced=true |
| 195 | + break |
| 196 | + fi |
| 197 | + sleep 5 |
| 198 | +done |
| 199 | +date |
| 200 | +if [[ "${synced}" != true ]]; then |
| 201 | + echo "kube-proxy failed to sync to ${PROXY_MODE}:" |
| 202 | + oc logs -n kube-proxy-test kube-proxy |& indent |
| 203 | + |
| 204 | + echo "last-seen metrics:" |
| 205 | + indent "${TMPDIR}/metrics.txt" |
| 206 | + |
| 207 | + exit 1 |
| 208 | +fi |
| 209 | + |
| 210 | +# Dump the ruleset; since RHEL9 uses iptables-nft, kube-proxy's rules |
| 211 | +# will show up in the nft ruleset regardless of whether kube-proxy is |
| 212 | +# using iptables or nftables. |
| 213 | +echo "Dumping rules" |
| 214 | +oc exec -n kube-proxy-test kube-proxy -- nft list ruleset >& "${TMPDIR}/nft.out" |
| 215 | + |
| 216 | +# We don't want to hardcode any assumptions about what kube-proxy's |
| 217 | +# rules look like, but it necessarily must be the case that every |
| 218 | +# clusterIP appears somewhere in the output. (We could look for |
| 219 | +# endpoint IPs too, but that's more racy if there's any chance the |
| 220 | +# cluster could be changing.) |
| 221 | +exitcode=0 |
| 222 | +for service in kubernetes.default dns-default.openshift-dns router-default.openshift-ingress; do |
| 223 | + name="${service%.*}" |
| 224 | + namespace="${service#*.}" |
| 225 | + clusterIP="$(oc get service -n ${namespace} ${name} -o jsonpath='{.spec.clusterIP}')" |
| 226 | + echo "Looking for ${service} cluster IP (${clusterIP}) in ruleset" |
| 227 | + for ip in ${clusterIP}; do |
| 228 | + if ! grep --quiet --fixed-strings " ${ip} " "${TMPDIR}/nft.out"; then |
| 229 | + echo "Did not find IP ${ip} (from service ${name} in namespace ${namespace}) in ruleset" 1>&2 |
| 230 | + exitcode=1 |
| 231 | + fi |
| 232 | + done |
| 233 | +done |
| 234 | +echo "" |
| 235 | + |
| 236 | +if [[ "${exitcode}" == 1 ]]; then |
| 237 | + echo "Ruleset was:" |
| 238 | + indent "${TMPDIR}/nft.out" |
| 239 | + |
| 240 | + echo "kube-proxy logs:" |
| 241 | + oc logs -n kube-proxy-test kube-proxy |& indent |
| 242 | +fi |
| 243 | + |
| 244 | +exit "${exitcode}" |
0 commit comments