From 30330bbc3d36994d0db8678ed1738217bb7a5f76 Mon Sep 17 00:00:00 2001 From: Shuyang Xin Date: Mon, 17 Oct 2022 23:23:03 +0800 Subject: [PATCH] Enable containerd CI pipeline for antrea windows Add e2e,conformance and networkpolicy tests for windows containerd cluster. ginkgo:v2.1.6 kubernetes:v1.25 containerd:v1.6.6 Signed-off-by: Shuyang Xin --- ci/jenkins/test.sh | 221 +++++++++++++++++++++++++++++++++++-- test/e2e/traceflow_test.go | 6 + 2 files changed, 220 insertions(+), 7 deletions(-) diff --git a/ci/jenkins/test.sh b/ci/jenkins/test.sh index c6593299af6..de8691f7e4f 100755 --- a/ci/jenkins/test.sh +++ b/ci/jenkins/test.sh @@ -35,11 +35,15 @@ PROXY_ALL=false DEFAULT_IP_MODE="ipv4" IP_MODE="" K8S_VERSION="1.23.6-00" +WINDOWS_YAML="windows" +WIN_JUMPER="" WINDOWS_CONFORMANCE_FOCUS="\[sig-network\].+\[Conformance\]|\[sig-windows\]" WINDOWS_CONFORMANCE_SKIP="\[LinuxOnly\]|\[Slow\]|\[Serial\]|\[Disruptive\]|\[Flaky\]|\[Feature:.+\]|\[sig-cli\]|\[sig-storage\]|\[sig-auth\]|\[sig-api-machinery\]|\[sig-apps\]|\[sig-node\]|\[Privileged\]|should be able to change the type from|\[sig-network\] Services should be able to create a functioning NodePort service \[Conformance\]|Service endpoints latency should not be very high|should be able to create a functioning NodePort service for Windows" +WINDOWS_CONFORMANCE_CONTAINERD_SKIP="\[LinuxOnly\]|\[Slow\]|\[Serial\]|\[Disruptive\]|\[Flaky\]|\[Feature:.+\]|\[sig-cli\]|\[sig-storage\]|\[sig-auth\]|\[sig-api-machinery\]|\[sig-apps\]|\[sig-node\]|\[Privileged\]|should be able to change the type from|\[sig-network\] Services should be able to create a functioning NodePort service \[Conformance\]|Service endpoints latency should not be very high|should be able to create a functioning NodePort service for Windows|should provide DNS for pods for Hostname|should provide /etc/hosts entries for the cluster" WINDOWS_NETWORKPOLICY_FOCUS="\[Feature:NetworkPolicy\]" WINDOWS_NETWORKPOLICY_SKIP="SCTP" +WINDOWS_NETWORKPOLICY_CONTAINERD_SKIP="\[sig-storage\]|SCTP" CONFORMANCE_SKIP="\[Slow\]|\[Serial\]|\[Disruptive\]|\[Flaky\]|\[Feature:.+\]|\[sig-cli\]|\[sig-storage\]|\[sig-auth\]|\[sig-api-machinery\]|\[sig-apps\]|\[sig-node\]" NETWORKPOLICY_SKIP="should allow egress access to server in CIDR block|should enforce except clause while egress access to server in CIDR block" @@ -49,7 +53,7 @@ CLEAN_STALE_IMAGES="docker system prune --force --all --filter until=48h" CLEAN_STALE_IMAGES_CONTAINERD="crictl rmi --prune" _usage="Usage: $0 [--kubeconfig ] [--workdir ] - [--testcase ] + [--testcase ] Run K8s e2e community tests (Conformance & Network Policy) or Antrea e2e tests on a remote (Jenkins) Windows or Linux cluster. @@ -102,6 +106,10 @@ case $key in IP_MODE="$2" shift 2 ;; + --win-jumper) + WIN_JUMPER="$2" + shift 2 + ;; -h|--help) print_usage exit 0 @@ -229,9 +237,9 @@ function collect_windows_network_info_and_logs { function wait_for_antrea_windows_pods_ready { kubectl apply -f "${WORKDIR}/antrea.yml" if [[ "${PROXY_ALL}" == false ]]; then - kubectl apply -f "${WORKDIR}/kube-proxy-windows.yml" + kubectl apply -f "${WORKDIR}/kube-proxy-${WINDOWS_YAML}.yml" fi - kubectl apply -f "${WORKDIR}/antrea-windows.yml" + kubectl apply -f "${WORKDIR}/antrea-${WINDOWS_YAML}.yml" kubectl rollout restart deployment/coredns -n kube-system kubectl rollout status deployment/coredns -n kube-system kubectl rollout status deployment.apps/antrea-controller -n kube-system @@ -425,6 +433,160 @@ function deliver_antrea_windows { rm -f antrea-windows.tar.gz } +function deliver_antrea_windows_containerd { + echo "====== Cleanup Antrea Installation ======" + clean_up_one_ns "antrea-test" + kubectl delete -f ${WORKDIR}/antrea-windows-containerd.yml --ignore-not-found=true || true + kubectl delete -f ${WORKDIR}/kube-proxy-windows-containerd.yml --ignore-not-found=true || true + kubectl delete daemonset antrea-agent -n kube-system --ignore-not-found=true || true + kubectl delete -f ${WORKDIR}/antrea.yml --ignore-not-found=true || true + + echo "====== Building Antrea for the Following Commit ======" + export GO111MODULE=on + export GOPATH=${WORKDIR}/go + export GOROOT=/usr/local/go + export GOCACHE=${WORKSPACE}/../gocache + export PATH=${GOROOT}/bin:$PATH + + git show --numstat + make clean + ${CLEAN_STALE_IMAGES_CONTAINERD} + chmod -R g-w build/images/ovs + chmod -R g-w build/images/base + DOCKER_REGISTRY="${DOCKER_REGISTRY}" ./hack/build-antrea-linux-all.sh --pull + if [[ "$TESTCASE" == "windows-networkpolicy-process" ]]; then + make windows-bin + fi + + echo "====== Delivering Antrea to all the Nodes ======" + export_govc_env_var + + # Enable verbose log for troubleshooting. + sed -i "s/--v=0/--v=4/g" build/yamls/antrea.yml build/yamls/antrea-windows-containerd.yml + + cp -f build/yamls/*.yml $WORKDIR + docker save -o antrea-ubuntu.tar antrea/antrea-ubuntu:latest + + echo "===== Pull necessary images on Control-Plane node =====" + harbor_images=("agnhost:2.13" "nginx:1.15-alpine") + antrea_images=("e2eteam/agnhost:2.13" "docker.io/library/nginx:1.15-alpine") + common_images=("k8s.gcr.io/e2e-test-images/agnhost:2.29") + for i in "${!harbor_images[@]}"; do + ctr -n=k8s.io images pull "${DOCKER_REGISTRY}/antrea/${harbor_images[i]}" + ctr -n=k8s.io images tag "${DOCKER_REGISTRY}/antrea/${harbor_images[i]}" "${antrea_images[i]}" + done + echo "===== Deliver Antrea to Linux worker nodes and pull necessary images on worker nodes =====" + kubectl get nodes -o wide --no-headers=true | awk -v role="$CONTROL_PLANE_NODE_ROLE" '$3 !~ role && $1 !~ /win/ {print $6}' | while read IP; do + rsync -avr --progress --inplace -e "ssh -o StrictHostKeyChecking=no" antrea-ubuntu.tar jenkins@${IP}:${WORKDIR}/antrea-ubuntu.tar + ssh -o StrictHostKeyChecking=no -n jenkins@${IP} "${CLEAN_STALE_IMAGES_CONTAINERD}; ctr -n=k8s.io images import ${WORKDIR}/antrea-ubuntu.tar" || true + + harbor_images=("agnhost:2.13" "nginx:1.15-alpine") + antrea_images=("e2eteam/agnhost:2.13" "docker.io/library/nginx:1.15-alpine") + for i in "${!harbor_images[@]}"; do + ssh -o StrictHostKeyChecking=no -n jenkins@${IP} "ctr -n=k8s.io images pull ${DOCKER_REGISTRY}/antrea/${harbor_images[i]} && ctr -n=k8s.io images tag ${DOCKER_REGISTRY}/antrea/${harbor_images[i]} ${antrea_images[i]}" || true + done + # Pull necessary images in advance to avoid transient error + for image in "${common_images[@]}"; do + ssh -o StrictHostKeyChecking=no -n jenkins@${IP} "ctr -n=k8s.io images pull ${image}" || true + done + done + + echo "===== Build Antrea Windows on Windows Jumper Node =====" + rm -f antrea-windows.tar.gz + # Compress antrea repo and copy it to a Windows node + mkdir -p jenkins + tar --exclude='./jenkins' -czf jenkins/antrea_repo.tar.gz -C "$(pwd)" . + for i in `seq 2`; do + timeout 2m scp -o StrictHostKeyChecking=no -T jenkins/antrea_repo.tar.gz Administrator@${WIN_JUMPER}: && break + done + ssh -o StrictHostKeyChecking=no -n Administrator@${WIN_JUMPER} "docker pull ${DOCKER_REGISTRY}/antrea/golang:${GO_VERSION}-nanoserver && docker tag ${DOCKER_REGISTRY}/antrea/golang:${GO_VERSION}-nanoserver golang:${GO_VERSION}-nanoserver" + ssh -o StrictHostKeyChecking=no -n Administrator@${WIN_JUMPER} "rm -rf antrea && mkdir antrea && cd antrea && tar -xzf ../antrea_repo.tar.gz > /dev/null && sed -i \"s|build/images/base-windows/Dockerfile|build/images/base-windows/Dockerfile --network host|g\" Makefile && sed -i \"s|build/images/Dockerfile.build.windows|build/images/Dockerfile.build.windows --network host|g\" Makefile && NO_PULL=${NO_PULL} make build-windows && docker save -o antrea-windows.tar antrea/antrea-windows:latest && gzip -f antrea-windows.tar" || true + for i in `seq 2`; do + timeout 2m scp -o StrictHostKeyChecking=no -T Administrator@${WIN_JUMPER}:antrea/antrea-windows.tar.gz . && break + done + + echo "===== Deliver Antrea Windows to Windows worker nodes and pull necessary images on Windows worker nodes =====" + sed -i 's/if (!(Test-Path $AntreaAgentConfigPath))/if ($true)/' hack/windows/Helper.psm1 + kubectl get nodes -o wide --no-headers=true | awk -v role="$CONTROL_PLANE_NODE_ROLE" '$3 !~ role && $1 ~ /win/ {print $1}' | while read WORKER_NAME; do + echo "==== Reverting Windows VM ${WORKER_NAME} =====" + govc snapshot.revert -vm ${WORKER_NAME} win-initial + # If Windows VM fails to power on correctly in time, retry several times. + winVMIPs="" + for i in `seq 10`; do + winVMIPs=$(govc vm.ip -wait=2m -a ${WORKER_NAME}) + if [[ $winVMIPs != "" ]]; then + echo "Windows VM ${WORKER_NAME} powered on" + break + fi + echo "Windows VM ${WORKER_NAME} failed to power on" + govc vm.power -on ${WORKER_NAME} || true + done + if [[ $winVMIPs == "" ]]; then + echo "Windows VM ${WORKER_NAME} didn't power on after 3 tries, exiting" + exit 1 + fi + IP=$(kubectl get node "${WORKER_NAME}" -o jsonpath='{.status.addresses[0].address}') + # Windows VM is reverted to an old snapshot so computer date needs updating. + for i in `seq 24`; do + sleep 5 + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "W32tm /resync /force" | grep successfully && break + done + # Avoid potential resync delay error + sleep 5 + # Some tests need us.gcr.io/k8s-artifacts-prod/e2e-test-images/agnhost:2.13 image but it is not for windows/amd64 10.0.17763 + # Use e2eteam/agnhost:2.13 instead + harbor_images=("sigwindowstools-kube-proxy:v1.18.0" "agnhost:2.13" "agnhost:2.13" "agnhost:2.29" "e2eteam-jessie-dnsutils:1.0" "e2eteam-pause:3.2") + antrea_images=("sigwindowstools/kube-proxy:v1.18.0" "e2eteam/agnhost:2.13" "us.gcr.io/k8s-artifacts-prod/e2e-test-images/agnhost:2.13" "k8s.gcr.io/e2e-test-images/agnhost:2.29" "e2eteam/jessie-dnsutils:1.0" "e2eteam/pause:3.2") + # Pull necessary images in advance to avoid transient error + for i in "${!harbor_images[@]}"; do + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "ctr -n=k8s.io images pull ${DOCKER_REGISTRY}/antrea/${harbor_images[i]} && ctr -n=k8s.io images tag ${DOCKER_REGISTRY}/antrea/${harbor_images[i]} ${antrea_images[i]}" || true + done + + # Use a script to run antrea agent in windows Network Policy cases + if [ "$TESTCASE" == "windows-networkpolicy-process" ]; then + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "powershell stop-service kubelet" + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "powershell stop-service docker" + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "powershell rm C:\ProgramData\docker\docker.pid" || true + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "powershell start-service docker" + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "powershell start-service kubelet" + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "powershell start-service ovsdb-server" + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "powershell start-service ovs-vswitchd" + echo "===== Use script to startup antrea agent =====" + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "rm -rf /cygdrive/c/k/antrea && mkdir -p /cygdrive/c/k/antrea/bin && mkdir -p /cygdrive/c/k/antrea/etc && rm -rf /cygdrive/c/opt/cni/bin && mkdir -p /cygdrive/c/opt/cni/bin && mkdir -p /cygdrive/c/etc/cni/net.d" + scp -o StrictHostKeyChecking=no -T $KUBECONFIG Administrator@${IP}:/cygdrive/c/k/config + scp -o StrictHostKeyChecking=no -T bin/antrea-agent.exe Administrator@${IP}:/cygdrive/c/k/antrea/bin/ + scp -o StrictHostKeyChecking=no -T bin/antctl.exe Administrator@${IP}:/cygdrive/c/k/antrea/bin/antctl.exe + scp -o StrictHostKeyChecking=no -T bin/antrea-cni.exe Administrator@${IP}:/cygdrive/c/opt/cni/bin/antrea.exe + scp -o StrictHostKeyChecking=no -T hack/windows/Start-AntreaAgent.ps1 Administrator@${IP}:/cygdrive/c/k/antrea/ + scp -o StrictHostKeyChecking=no -T hack/windows/Stop-AntreaAgent.ps1 Administrator@${IP}:/cygdrive/c/k/antrea/ + scp -o StrictHostKeyChecking=no -T hack/windows/Helper.psm1 Administrator@${IP}:/cygdrive/c/k/antrea/ + scp -o StrictHostKeyChecking=no -T build/yamls/windows/base/conf/antrea-cni.conflist Administrator@${IP}:/cygdrive/c/etc/cni/net.d/10-antrea.conflist + scp -o StrictHostKeyChecking=no -T build/yamls/windows/base/conf/antrea-agent.conf Administrator@${IP}:/cygdrive/c/k/antrea/etc + else + if ! (test -f antrea-windows.tar.gz); then + # Compress antrea repo and copy it to a Windows node + mkdir -p jenkins + tar --exclude='./jenkins' -czf jenkins/antrea_repo.tar.gz -C "$(pwd)" . + for i in `seq 2`; do + timeout 2m scp -o StrictHostKeyChecking=no -T jenkins/antrea_repo.tar.gz Administrator@${IP}: && break + done + echo "=== Build Windows on Windows Node===" + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "docker pull ${DOCKER_REGISTRY}/antrea/golang:${GO_VERSION}-nanoserver && docker tag ${DOCKER_REGISTRY}/antrea/golang:${GO_VERSION}-nanoserver golang:${GO_VERSION}-nanoserver" + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "rm -rf antrea && mkdir antrea && cd antrea && tar -xzf ../antrea_repo.tar.gz > /dev/null && sed -i \"s|build/images/base-windows/Dockerfile|build/images/base-windows/Dockerfile --network host|g\" Makefile && sed -i \"s|build/images/Dockerfile.build.windows|build/images/Dockerfile.build.windows --network host|g\" Makefile && NO_PULL=${NO_PULL} make build-windows && docker save -o antrea-windows.tar ${DOCKER_REGISTRY}/antrea/antrea-windows:latest && gzip -f antrea-windows.tar" || true + for i in `seq 2`; do + timeout 2m scp -o StrictHostKeyChecking=no -T Administrator@${IP}:antrea/antrea-windows.tar.gz . && break + done + else + for i in `seq 2`; do + timeout 2m scp -o StrictHostKeyChecking=no -T antrea-windows.tar.gz Administrator@${IP}: && break + done + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "ctr -n k8s.io images import antrea-windows.tar.gz" + fi + fi + done + rm -f antrea-windows.tar.gz +} + function deliver_antrea { echo "====== Cleanup Antrea Installation ======" clean_up_one_ns "monitoring" || true @@ -662,6 +824,41 @@ function run_conformance_windows { fi } +function run_conformance_windows_containerd { + echo "====== Running Antrea Conformance Tests ======" + export GO111MODULE=on + export GOPATH=${WORKDIR}/go + export GOROOT=/usr/local/go + export GOCACHE=${WORKDIR}/.cache/go-build + export PATH=$GOROOT/bin:$PATH + + if [[ "$TESTCASE" == "windows-networkpolicy-process" ]]; then + # Antrea Windows agents are deployed with scripts as processes on host for Windows NetworkPolicy test + wait_for_antrea_windows_processes_ready + else + # Antrea Windows agent Pods are deployed for Windows Conformance test + clean_for_windows_install_cni + wait_for_antrea_windows_pods_ready + fi + + echo "====== Run test with e2e.test ======" + export KUBE_TEST_REPO_LIST=${WORKDIR}/repo_list + if [ "$TESTCASE" == "windows-containerd-networkpolicy" ]; then + ginkgo -timeout=2h --noColor $E2ETEST_PATH -- --provider=skeleton --ginkgo.focus="$WINDOWS_NETWORKPOLICY_FOCUS" --ginkgo.skip="$WINDOWS_NETWORKPOLICY_CONTAINERD_SKIP" > windows_conformance_result_no_color.txt || true + else + ginkgo --noColor $E2ETEST_PATH -- --provider=skeleton --node-os-distro=windows --ginkgo.focus="$WINDOWS_CONFORMANCE_FOCUS" --ginkgo.skip="$WINDOWS_CONFORMANCE_CONTAINERD_SKIP" > windows_conformance_result_no_color.txt || true + fi + + if grep -Fxq "Test Suite Failed" windows_conformance_result_no_color.txt; then + echo "=== Failed cases exist ===" + TEST_FAILURE=true + collect_windows_network_info_and_logs + else + echo "All tests passed." + fi +} + + function run_install_windows_ovs { echo "===== Verify Install-OVS =====" export_govc_env_var @@ -892,11 +1089,21 @@ fi trap clean_antrea EXIT if [[ ${TESTCASE} =~ "windows" ]]; then - deliver_antrea_windows - if [[ ${TESTCASE} =~ "e2e" ]]; then - run_e2e_windows + if [[ ${TESTCASE} =~ "containerd" ]]; then + WINDOWS_YAML="windows-containerd" + deliver_antrea_windows_containerd + if [[ ${TESTCASE} =~ "e2e" ]]; then + run_e2e_windows + else + run_conformance_windows_containerd + fi else - run_conformance_windows + deliver_antrea_windows + if [[ ${TESTCASE} =~ "e2e" ]]; then + run_e2e_windows + else + run_conformance_windows + fi fi elif [[ ${TESTCASE} =~ "e2e" ]]; then deliver_antrea diff --git a/test/e2e/traceflow_test.go b/test/e2e/traceflow_test.go index c2e1fae585a..82cff22ea85 100644 --- a/test/e2e/traceflow_test.go +++ b/test/e2e/traceflow_test.go @@ -103,6 +103,8 @@ func testTraceflowIntraNodeANP(t *testing.T, data *TestData) { node1 := nodeName(nodeIdx) node1Pods, _, node1CleanupFn := createTestAgnhostPods(t, data, 3, data.testNamespace, node1) defer node1CleanupFn() + // Give a little time for Nodes to install OVS flows. + time.Sleep(time.Second * 2) var denyIngress *v1alpha1.NetworkPolicy denyIngressName := "test-anp-deny-ingress" @@ -300,6 +302,8 @@ func testTraceflowIntraNode(t *testing.T, data *TestData) { agentPod, _ := data.getAntreaPodOnNode(node1) node1Pods, node1IPs, node1CleanupFn := createTestAgnhostPods(t, data, 3, data.testNamespace, node1) defer node1CleanupFn() + // Give a little time for Nodes to install OVS flows. + time.Sleep(time.Second * 2) var pod0IPv4Str, pod1IPv4Str, dstPodIPv4Str, dstPodIPv6Str string if node1IPs[0].ipv4 != nil { pod0IPv4Str = node1IPs[0].ipv4.String() @@ -2012,6 +2016,8 @@ func testTraceflowExternalIP(t *testing.T, data *TestData) { nodeIP := nodeIP(nodeIdx) podNames, _, cleanupFn := createTestAgnhostPods(t, data, 1, data.testNamespace, node) defer cleanupFn() + // Give a little time for Nodes to install OVS flows. + time.Sleep(time.Second * 2) testcase := testcase{ name: "nodeIPDestination",