From 3d08fa2d5ffa5d266898ef19ab1ae37dd8514933 Mon Sep 17 00:00:00 2001 From: Shuyang Xin Date: Mon, 17 Oct 2022 23:23:03 +0800 Subject: [PATCH] Enable containerd CI pipeline for antrea windows Add e2e,conformance and networkpolicy tests for windows containerd testbed. ginkgo:v2.1.6 kubernetes:v1.25 containerd:v1.6.6 Signed-off-by: Shuyang Xin --- build/yamls/antrea-windows-containerd.yml | 56 ++-- build/yamls/windows/base/agent-containerd.yml | 88 ++++++ .../conf/Install-WindowsCNI-Containerd.ps1 | 11 + .../base/conf/Run-AntreaAgent-Containerd.ps1 | 4 + ci/jenkins/test-mc.sh | 2 +- ci/jenkins/test.sh | 257 ++++++++++++++---- docs/windows.md | 75 ++++- hack/generate-manifest-windows.sh | 19 +- hack/release/prepare-assets.sh | 1 + test/e2e/traceflow_test.go | 9 + 10 files changed, 430 insertions(+), 92 deletions(-) create mode 100644 build/yamls/windows/base/agent-containerd.yml create mode 100644 build/yamls/windows/base/conf/Install-WindowsCNI-Containerd.ps1 create mode 100644 build/yamls/windows/base/conf/Run-AntreaAgent-Containerd.ps1 diff --git a/build/yamls/antrea-windows-containerd.yml b/build/yamls/antrea-windows-containerd.yml index f602fa30389..4448474109d 100644 --- a/build/yamls/antrea-windows-containerd.yml +++ b/build/yamls/antrea-windows-containerd.yml @@ -1,6 +1,6 @@ apiVersion: v1 data: - Install-WindowsCNI-priv.ps1: | + Install-WindowsCNI-Containerd.ps1: | $ErrorActionPreference = "Stop"; mkdir -force c:/var/log/antrea @@ -11,13 +11,13 @@ data: cp $mountPath/var/run/secrets/kubernetes.io/serviceaccount/token C:/var/run/secrets/kubernetes.io/serviceaccount cp $mountPath/k/antrea/cni/* c:/opt/cni/bin/ cp $mountPath/etc/antrea/antrea-cni.conflist c:/etc/cni/net.d/10-antrea.conflist + mkdir -force c:/k/antrea/bin + cp $mountPath/k/antrea/bin/antctl.exe c:/k/antrea/bin/antctl.exe - Run-AntreaAgent.ps1: | + Run-AntreaAgent-Containerd.ps1: | $ErrorActionPreference = "Stop" $mountPath = $env:CONTAINER_SANDBOX_MOUNT_POINT $mountPath = ($mountPath.Replace('\', '/')).TrimEnd('/') - mkdir -force c:/k/antrea/bin - cp $mountPath/k/antrea/bin/* c:/k/antrea/bin/ & "$mountPath/k/antrea/bin/antrea-agent.exe" --config=$mountPath/etc/antrea/antrea-agent.conf --logtostderr=false --log_dir=c:/var/log/antrea --alsologtostderr --log_file_max_size=100 --log_file_max_num=4 --v=4 kind: ConfigMap metadata: @@ -34,7 +34,7 @@ data: # Enable antrea proxy which provides ServiceLB for in-cluster services in antrea agent. # It should be enabled on Windows, otherwise NetworkPolicy will not take effect on # Service traffic. - AntreaProxy: true + # AntreaProxy: true # Enable EndpointSlice support in AntreaProxy. Don't enable this feature unless that EndpointSlice # API version v1beta1 is supported and set as enabled in Kubernetes. If AntreaProxy is not enabled, @@ -61,6 +61,11 @@ data: # - stt #tunnelType: geneve + # TunnelPort is the destination port for UDP and TCP based tunnel protocols + # (Geneve, VXLAN, and STT). If zero, it will use the assigned IANA port for the + # protocol, i.e. 6081 for Geneve, 4789 for VXLAN, and 7471 for STT. + #tunnelPort: 0 + # Default MTU to use for the host gateway interface and the network interface of each Pod. # If omitted, antrea-agent will discover the MTU of the Node's primary interface and # also adjust MTU to accommodate for tunnel encapsulation overhead. @@ -69,7 +74,7 @@ data: # ClusterIP CIDR range for Services. It's required when AntreaProxy is not enabled, and should be # set to the same value as the one specified by --service-cluster-ip-range for kube-apiserver. When # AntreaProxy is enabled, this parameter is not needed and will be ignored if provided. - serviceCIDR: 10.96.0.0/12 + #serviceCIDR: 10.96.0.0/12 # The port for the antrea-agent APIServer to serve on. #apiPort: 10350 @@ -138,7 +143,7 @@ data: # Provide the address of Kubernetes apiserver, to override any value provided in kubeconfig or InClusterConfig. # Defaults to "". It must be a host string, a host:port pair, or a URL to the base of the apiserver. - #kubeAPIServerOverride: "10.176.210.86:6443" + #kubeAPIServerOverride: "" # Option antreaProxy contains AntreaProxy related configuration options. antreaProxy: @@ -147,6 +152,7 @@ data: # Note that this option is experimental. If kube-proxy is removed, option kubeAPIServerOverride must be used to access # apiserver directly. #proxyAll: false + nodePortLocal: # Enable NodePortLocal, a feature used to make Pods reachable using port forwarding on the host. To # enable this feature, you need to set "enable" to true, and ensure that the NodePortLocal feature @@ -206,7 +212,7 @@ spec: containers: - args: - -file - - $env:CONTAINER_SANDBOX_MOUNT_POINT/var/lib/antrea-windows/Run-AntreaAgent.ps1 + - $env:CONTAINER_SANDBOX_MOUNT_POINT/var/lib/antrea-windows/Run-AntreaAgent-Containerd.ps1 command: - powershell env: @@ -222,7 +228,7 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - image: projects.registry.vmware.com/antrea/antrea-windows:latest + image: antrea/antrea-windows:latest imagePullPolicy: IfNotPresent name: antrea-agent volumeMounts: @@ -230,32 +236,22 @@ spec: name: antrea-windows-config - mountPath: /var/lib/antrea-windows name: antrea-agent-windows - - mountPath: /host/k/antrea/ - name: host-antrea-home - mountPath: /var/log/antrea/ name: var-log-antrea hostNetwork: true initContainers: - args: - -File - - $env:CONTAINER_SANDBOX_MOUNT_POINT/var/lib/antrea-windows/Install-WindowsCNI-priv.ps1 + - $env:CONTAINER_SANDBOX_MOUNT_POINT/var/lib/antrea-windows/Install-WindowsCNI-Containerd.ps1 command: - powershell - image: projects.registry.vmware.com/antrea/antrea-windows:latest + image: antrea/antrea-windows:latest imagePullPolicy: IfNotPresent name: install-cni volumeMounts: - mountPath: /etc/antrea name: antrea-windows-config readOnly: true - - mountPath: /host/etc/cni/net.d - name: host-cni-conf - - mountPath: /host/opt/cni/bin - name: host-cni-bin - - mountPath: /host/k/antrea/ - name: host-antrea-home - - mountPath: /host/var/run/secrets/ - name: host-secrets-path - mountPath: /var/lib/antrea-windows name: antrea-agent-windows nodeSelector: @@ -275,25 +271,9 @@ spec: defaultMode: 420 name: antrea-agent-windows-kht6m7hthm name: antrea-agent-windows - - hostPath: - path: /etc/cni/net.d - type: DirectoryOrCreate - name: host-cni-conf - - hostPath: - path: /opt/cni/bin - type: DirectoryOrCreate - name: host-cni-bin - - hostPath: - path: /k/antrea - type: DirectoryOrCreate - name: host-antrea-home - hostPath: path: /var/log/antrea/ type: DirectoryOrCreate name: var-log-antrea - - hostPath: - path: /var/run/secrets/ - type: DirectoryOrCreate - name: host-secrets-path updateStrategy: - type: RollingUpdate \ No newline at end of file + type: RollingUpdate diff --git a/build/yamls/windows/base/agent-containerd.yml b/build/yamls/windows/base/agent-containerd.yml new file mode 100644 index 00000000000..ddaf93b30d4 --- /dev/null +++ b/build/yamls/windows/base/agent-containerd.yml @@ -0,0 +1,88 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + component: antrea-agent + name: antrea-agent-windows + namespace: kube-system +spec: + selector: + matchLabels: + component: antrea-agent + template: + metadata: + annotations: + "microsoft.com/hostprocess-inherit-user": "true" + labels: + component: antrea-agent + spec: + securityContext: + windowsOptions: + runAsUserName: "NT AUTHORITY\\SYSTEM" + hostProcess: true + containers: + - args: + - -file + - $env:CONTAINER_SANDBOX_MOUNT_POINT/var/lib/antrea-windows/Run-AntreaAgent-Containerd.ps1 + command: + - powershell + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + image: antrea-windows + name: antrea-agent + volumeMounts: + - mountPath: /etc/antrea + name: antrea-windows-config + - mountPath: /var/lib/antrea-windows + name: antrea-agent-windows + - mountPath: /var/log/antrea/ + name: var-log-antrea + hostNetwork: true + initContainers: + - args: + - -File + - $env:CONTAINER_SANDBOX_MOUNT_POINT/var/lib/antrea-windows/Install-WindowsCNI-Containerd.ps1 + command: + - powershell + image: antrea-windows + name: install-cni + volumeMounts: + - mountPath: /etc/antrea + name: antrea-windows-config + readOnly: true + - mountPath: /var/lib/antrea-windows + name: antrea-agent-windows + nodeSelector: + kubernetes.io/os: windows + priorityClassName: system-node-critical + serviceAccountName: antrea-agent + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - effect: NoSchedule + operator: Exists + volumes: + - configMap: + name: antrea-windows-config + name: antrea-windows-config + - configMap: + defaultMode: 420 + name: antrea-agent-windows + name: antrea-agent-windows + - hostPath: + path: /var/log/antrea/ + type: DirectoryOrCreate + name: var-log-antrea + updateStrategy: + type: RollingUpdate diff --git a/build/yamls/windows/base/conf/Install-WindowsCNI-Containerd.ps1 b/build/yamls/windows/base/conf/Install-WindowsCNI-Containerd.ps1 new file mode 100644 index 00000000000..de366fa4570 --- /dev/null +++ b/build/yamls/windows/base/conf/Install-WindowsCNI-Containerd.ps1 @@ -0,0 +1,11 @@ +$ErrorActionPreference = "Stop"; +mkdir -force c:/var/log/antrea +$mountPath = $env:CONTAINER_SANDBOX_MOUNT_POINT +$mountPath = ($mountPath.Replace('\', '/')).TrimEnd('/') +mkdir -force C:/var/run/secrets/kubernetes.io/serviceaccount +cp $mountPath/var/run/secrets/kubernetes.io/serviceaccount/ca.crt C:/var/run/secrets/kubernetes.io/serviceaccount +cp $mountPath/var/run/secrets/kubernetes.io/serviceaccount/token C:/var/run/secrets/kubernetes.io/serviceaccount +cp $mountPath/k/antrea/cni/* c:/opt/cni/bin/ +cp $mountPath/etc/antrea/antrea-cni.conflist c:/etc/cni/net.d/10-antrea.conflist +mkdir -force c:/k/antrea/bin +cp $mountPath/k/antrea/bin/antctl.exe c:/k/antrea/bin/antctl.exe diff --git a/build/yamls/windows/base/conf/Run-AntreaAgent-Containerd.ps1 b/build/yamls/windows/base/conf/Run-AntreaAgent-Containerd.ps1 new file mode 100644 index 00000000000..8f6ada89f67 --- /dev/null +++ b/build/yamls/windows/base/conf/Run-AntreaAgent-Containerd.ps1 @@ -0,0 +1,4 @@ +$ErrorActionPreference = "Stop" +$mountPath = $env:CONTAINER_SANDBOX_MOUNT_POINT +$mountPath = ($mountPath.Replace('\', '/')).TrimEnd('/') +& "$mountPath/k/antrea/bin/antrea-agent.exe" --config=$mountPath/etc/antrea/antrea-agent.conf --logtostderr=false --log_dir=c:/var/log/antrea --alsologtostderr --log_file_max_size=100 --log_file_max_num=4 --v=4 diff --git a/ci/jenkins/test-mc.sh b/ci/jenkins/test-mc.sh index a8bd6ffd810..18624a50acb 100755 --- a/ci/jenkins/test-mc.sh +++ b/ci/jenkins/test-mc.sh @@ -264,7 +264,7 @@ function deliver_antrea_multicluster { chmod -R g-w build/images/base DOCKER_REGISTRY="${DOCKER_REGISTRY}" ./hack/build-antrea-linux-all.sh --pull - echo "====== Delivering Antrea to all the Nodes ======" + echo "====== Delivering Antrea to all Nodes ======" docker save -o ${WORKDIR}/antrea-ubuntu.tar antrea/antrea-ubuntu:latest diff --git a/ci/jenkins/test.sh b/ci/jenkins/test.sh index 325651048b5..8c96760e58e 100755 --- a/ci/jenkins/test.sh +++ b/ci/jenkins/test.sh @@ -35,11 +35,14 @@ PROXY_ALL=false DEFAULT_IP_MODE="ipv4" IP_MODE="" K8S_VERSION="1.23.6-00" +WINDOWS_YAML_SUFFIX="windows" +WIN_JUMPER="" WINDOWS_CONFORMANCE_FOCUS="\[sig-network\].+\[Conformance\]|\[sig-windows\]" WINDOWS_CONFORMANCE_SKIP="\[LinuxOnly\]|\[Slow\]|\[Serial\]|\[Disruptive\]|\[Flaky\]|\[Feature:.+\]|\[sig-cli\]|\[sig-storage\]|\[sig-auth\]|\[sig-api-machinery\]|\[sig-apps\]|\[sig-node\]|\[Privileged\]|should be able to change the type from|\[sig-network\] Services should be able to create a functioning NodePort service \[Conformance\]|Service endpoints latency should not be very high|should be able to create a functioning NodePort service for Windows" WINDOWS_NETWORKPOLICY_FOCUS="\[Feature:NetworkPolicy\]" WINDOWS_NETWORKPOLICY_SKIP="SCTP" +WINDOWS_NETWORKPOLICY_CONTAINERD_SKIP="\[sig-storage\]|SCTP" CONFORMANCE_SKIP="\[Slow\]|\[Serial\]|\[Disruptive\]|\[Flaky\]|\[Feature:.+\]|\[sig-cli\]|\[sig-storage\]|\[sig-auth\]|\[sig-api-machinery\]|\[sig-apps\]|\[sig-node\]" NETWORKPOLICY_SKIP="should allow egress access to server in CIDR block|should enforce except clause while egress access to server in CIDR block" @@ -49,7 +52,7 @@ CLEAN_STALE_IMAGES="docker system prune --force --all --filter until=48h" CLEAN_STALE_IMAGES_CONTAINERD="crictl rmi --prune" _usage="Usage: $0 [--kubeconfig ] [--workdir ] - [--testcase ] + [--testcase ] Run K8s e2e community tests (Conformance & Network Policy) or Antrea e2e tests on a remote (Jenkins) Windows or Linux cluster. @@ -59,7 +62,8 @@ Run K8s e2e community tests (Conformance & Network Policy) or Antrea e2e tests o --registry The docker registry to use instead of dockerhub. --proxyall Enable proxyAll to test AntreaProxy. --testbed-type The testbed type to run tests. It can be flexible-ipam, jumper or legacy. - --ip-mode IP mode for flexible-ipam e2e test. Default is $DEFAULT_IP_MODE. It can also be ipv6 or ds." + --ip-mode IP mode for flexible-ipam e2e test. Default is $DEFAULT_IP_MODE. It can also be ipv6 or ds. + --win-jumper Name of the windows jumper node in containerd cluster. Images are built by docker on this node." function print_usage { echoerr "$_usage" @@ -102,6 +106,10 @@ case $key in IP_MODE="$2" shift 2 ;; + --win-jumper) + WIN_JUMPER="$2" + shift 2 + ;; -h|--help) print_usage exit 0 @@ -229,9 +237,9 @@ function collect_windows_network_info_and_logs { function wait_for_antrea_windows_pods_ready { kubectl apply -f "${WORKDIR}/antrea.yml" if [[ "${PROXY_ALL}" == false ]]; then - kubectl apply -f "${WORKDIR}/kube-proxy-windows.yml" + kubectl apply -f "${WORKDIR}/kube-proxy-${WINDOWS_YAML_SUFFIX}.yml" fi - kubectl apply -f "${WORKDIR}/antrea-windows.yml" + kubectl apply -f "${WORKDIR}/antrea-${WINDOWS_YAML_SUFFIX}.yml" kubectl rollout restart deployment/coredns -n kube-system kubectl rollout status deployment/coredns -n kube-system kubectl rollout status deployment.apps/antrea-controller -n kube-system @@ -275,14 +283,7 @@ function clean_up_one_ns { kubectl delete ns "${ns}" --ignore-not-found=true || true } -function deliver_antrea_windows { - echo "====== Cleanup Antrea Installation ======" - clean_up_one_ns "antrea-test" - kubectl delete -f ${WORKDIR}/antrea-windows.yml --ignore-not-found=true || true - kubectl delete -f ${WORKDIR}/kube-proxy-windows.yml --ignore-not-found=true || true - kubectl delete daemonset antrea-agent -n kube-system --ignore-not-found=true || true - kubectl delete -f ${WORKDIR}/antrea.yml --ignore-not-found=true || true - +function prepare_env { echo "====== Building Antrea for the Following Commit ======" export GO111MODULE=on export GOPATH=${WORKDIR}/go @@ -292,6 +293,46 @@ function deliver_antrea_windows { git show --numstat make clean +} + +function revert_snapshot_windows { + WIN_NAME=$1 + echo "==== Reverting Windows VM ${WIN_NAME} =====" + govc snapshot.revert -vm ${WIN_NAME} win-initial + # If Windows VM fails to power on correctly in time, retry several times. + winVMIPs="" + for i in `seq 10`; do + winVMIPs=$(govc vm.ip -wait=2m -a ${WIN_NAME}) + if [[ $winVMIPs != "" ]]; then + echo "Windows VM ${WIN_NAME} powered on" + break + fi + echo "Windows VM ${WIN_NAME} failed to power on" + govc vm.power -on ${WIN_NAME} || true + done + if [[ $winVMIPs == "" ]]; then + echo "Windows VM ${WIN_NAME} didn't power on after 10 tries, exiting" + exit 1 + fi + IP=${winVMIPs#*,} + # Windows VM is reverted to an old snapshot so computer date needs updating. + for i in `seq 24`; do + sleep 5 + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "W32tm /resync /force" | grep successfully && break + done + # Avoid potential resync delay error + sleep 5 +} + +function deliver_antrea_windows { + echo "====== Cleanup Antrea Installation ======" + clean_up_one_ns "antrea-test" + kubectl delete -f ${WORKDIR}/antrea-windows.yml --ignore-not-found=true || true + kubectl delete -f ${WORKDIR}/kube-proxy-windows.yml --ignore-not-found=true || true + kubectl delete daemonset antrea-agent -n kube-system --ignore-not-found=true || true + kubectl delete -f ${WORKDIR}/antrea.yml --ignore-not-found=true || true + + prepare_env ${CLEAN_STALE_IMAGES} chmod -R g-w build/images/ovs chmod -R g-w build/images/base @@ -300,7 +341,7 @@ function deliver_antrea_windows { make windows-bin fi - echo "====== Delivering Antrea to all the Nodes ======" + echo "====== Delivering Antrea to all Nodes ======" export_govc_env_var # Enable verbose log for troubleshooting. @@ -331,8 +372,6 @@ function deliver_antrea_windows { rsync -avr --progress --inplace -e "ssh -o StrictHostKeyChecking=no" antrea-ubuntu.tar jenkins@${IP}:${WORKDIR}/antrea-ubuntu.tar ssh -o StrictHostKeyChecking=no -n jenkins@${IP} "${CLEAN_STALE_IMAGES}; docker load -i ${WORKDIR}/antrea-ubuntu.tar" || true - harbor_images=("agnhost:2.13" "nginx:1.15-alpine") - antrea_images=("e2eteam/agnhost:2.13" "docker.io/library/nginx:1.15-alpine") for i in "${!harbor_images[@]}"; do ssh -o StrictHostKeyChecking=no -n jenkins@${IP} "docker pull -q ${DOCKER_REGISTRY}/antrea/${harbor_images[i]} && docker tag ${DOCKER_REGISTRY}/antrea/${harbor_images[i]} ${antrea_images[i]}" || true done @@ -346,31 +385,7 @@ function deliver_antrea_windows { rm -f antrea-windows.tar.gz sed -i 's/if (!(Test-Path $AntreaAgentConfigPath))/if ($true)/' hack/windows/Helper.psm1 kubectl get nodes -o wide --no-headers=true | awk -v role="$CONTROL_PLANE_NODE_ROLE" '$3 !~ role && $1 ~ /win/ {print $1}' | while read WORKER_NAME; do - echo "==== Reverting Windows VM ${WORKER_NAME} =====" - govc snapshot.revert -vm ${WORKER_NAME} win-initial - # If Windows VM fails to power on correctly in time, retry several times. - winVMIPs="" - for i in `seq 10`; do - winVMIPs=$(govc vm.ip -wait=2m -a ${WORKER_NAME}) - if [[ $winVMIPs != "" ]]; then - echo "Windows VM ${WORKER_NAME} powered on" - break - fi - echo "Windows VM ${WORKER_NAME} failed to power on" - govc vm.power -on ${WORKER_NAME} || true - done - if [[ $winVMIPs == "" ]]; then - echo "Windows VM ${WORKER_NAME} didn't power on after 3 tries, exiting" - exit 1 - fi - IP=$(kubectl get node "${WORKER_NAME}" -o jsonpath='{.status.addresses[0].address}') - # Windows VM is reverted to an old snapshot so computer date needs updating. - for i in `seq 24`; do - sleep 5 - ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "W32tm /resync /force" | grep successfully && break - done - # Avoid potential resync delay error - sleep 5 + revert_snapshot_windows ${WORKER_NAME} # Some tests need us.gcr.io/k8s-artifacts-prod/e2e-test-images/agnhost:2.13 image but it is not for windows/amd64 10.0.17763 # Use e2eteam/agnhost:2.13 instead harbor_images=("sigwindowstools-kube-proxy:v1.18.0" "agnhost:2.13" "agnhost:2.13" "agnhost:2.29" "e2eteam-jessie-dnsutils:1.0" "e2eteam-pause:3.2") @@ -429,6 +444,108 @@ function deliver_antrea_windows { rm -f antrea-windows.tar.gz } +function deliver_antrea_windows_containerd { + echo "====== Cleanup Antrea Installation ======" + clean_up_one_ns "antrea-test" + kubectl delete -f ${WORKDIR}/antrea-windows-containerd.yml --ignore-not-found=true || true + kubectl delete -f ${WORKDIR}/kube-proxy-windows-containerd.yml --ignore-not-found=true || true + kubectl delete daemonset antrea-agent -n kube-system --ignore-not-found=true || true + kubectl delete -f ${WORKDIR}/antrea.yml --ignore-not-found=true || true + + prepare_env + ${CLEAN_STALE_IMAGES_CONTAINERD} + chmod -R g-w build/images/ovs + chmod -R g-w build/images/base + DOCKER_REGISTRY="${DOCKER_REGISTRY}" ./hack/build-antrea-linux-all.sh --pull + + echo "====== Delivering Antrea to all Nodes ======" + export_govc_env_var + + # Enable verbose log for troubleshooting. + sed -i "s/--v=0/--v=4/g" build/yamls/antrea.yml build/yamls/antrea-windows-containerd.yml + + cp -f build/yamls/*.yml $WORKDIR + docker save -o antrea-ubuntu.tar antrea/antrea-ubuntu:latest + + # Clean docker image to save disk space. + ${CLEAN_STALE_IMAGES} + + echo "===== Pull necessary images on Control-Plane node =====" + harbor_images=("agnhost:2.13" "nginx:1.15-alpine") + antrea_images=("e2eteam/agnhost:2.13" "docker.io/library/nginx:1.15-alpine") + common_images=("k8s.gcr.io/e2e-test-images/agnhost:2.29") + k8s_images=("k8s.gcr.io/e2e-test-images/agnhost:2.40" "k8s.gcr.io/e2e-test-images/jessie-dnsutils:1.5" "k8s.gcr.io/e2e-test-images/nginx:1.14-2") + e2e_images=("k8sprow.azurecr.io/kubernetes-e2e-test-images/agnhost:2.40" "k8sprow.azurecr.io/kubernetes-e2e-test-images/jessie-dnsutils:1.5" "k8sprow.azurecr.io/kubernetes-e2e-test-images/nginx:1.14-2") + + for i in "${!harbor_images[@]}"; do + ctr -n=k8s.io images delete "${antrea_images[i]}" + ctr -n=k8s.io images pull "${DOCKER_REGISTRY}/antrea/${harbor_images[i]}" + ctr -n=k8s.io images tag "${DOCKER_REGISTRY}/antrea/${harbor_images[i]}" "${antrea_images[i]}" + done + echo "===== Deliver Antrea to Linux worker nodes and pull necessary images on worker nodes =====" + kubectl get nodes -o wide --no-headers=true | awk -v role="$CONTROL_PLANE_NODE_ROLE" '$3 !~ role && $1 !~ /win/ {print $6}' | while read IP; do + rsync -avr --progress --inplace -e "ssh -o StrictHostKeyChecking=no" antrea-ubuntu.tar jenkins@${IP}:${WORKDIR}/antrea-ubuntu.tar + ssh -o StrictHostKeyChecking=no -n jenkins@${IP} "${CLEAN_STALE_IMAGES_CONTAINERD}; ctr -n=k8s.io images import ${WORKDIR}/antrea-ubuntu.tar" || true + + for i in "${!harbor_images[@]}"; do + ssh -o StrictHostKeyChecking=no -n jenkins@${IP} "ctr -n=k8s.io images pull ${DOCKER_REGISTRY}/antrea/${harbor_images[i]} && ctr -n=k8s.io images tag ${DOCKER_REGISTRY}/antrea/${harbor_images[i]} ${antrea_images[i]}" || true + done + # Pull necessary images in advance to avoid transient error + for image in "${common_images[@]}"; do + ssh -o StrictHostKeyChecking=no -n jenkins@${IP} "ctr -n=k8s.io images pull ${image}" || true + done + # Pull necessary images for Kubernetes conformance test + for i in "${!k8s_images[@]}"; do + ssh -o StrictHostKeyChecking=no -n jenkins@${IP} "ctr -n=k8s.io images pull ${k8s_images[i]} && ctr -n=k8s.io images tag ${k8s_images[i]} ${e2e_images[i]}" || true + done + done + + echo "===== Build Antrea Windows on Windows Jumper Node =====" + echo "==== Reverting Windows VM ${WIN_JUMPER} =====" + revert_snapshot_windows ${WIN_JUMPER} + rm -f antrea-windows.tar.gz + # Compress antrea repo and copy it to a Windows node + mkdir -p jenkins + tar --exclude='./jenkins' -czf jenkins/antrea_repo.tar.gz -C "$(pwd)" . + for i in `seq 2`; do + timeout 2m scp -o StrictHostKeyChecking=no -T jenkins/antrea_repo.tar.gz Administrator@${IP}: && break + done + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "docker pull ${DOCKER_REGISTRY}/antrea/golang:${GO_VERSION}-nanoserver && docker tag ${DOCKER_REGISTRY}/antrea/golang:${GO_VERSION}-nanoserver golang:${GO_VERSION}-nanoserver" + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "rm -rf antrea && mkdir antrea && cd antrea && tar -xzf ../antrea_repo.tar.gz > /dev/null && NO_PULL=${NO_PULL}; DOCKER_NETWORK=host make build-windows && docker save -o antrea-windows.tar antrea/antrea-windows:latest && gzip -f antrea-windows.tar" || true + for i in `seq 2`; do + timeout 2m scp -o StrictHostKeyChecking=no -T Administrator@${IP}:antrea/antrea-windows.tar.gz . && break + done + + echo "===== Deliver Antrea Windows to Windows worker nodes and pull necessary images on Windows worker nodes =====" + sed -i 's/if (!(Test-Path $AntreaAgentConfigPath))/if ($true)/' hack/windows/Helper.psm1 + kubectl get nodes -o wide --no-headers=true | awk -v role="$CONTROL_PLANE_NODE_ROLE" '$3 !~ role && $1 ~ /win/ {print $1}' | while read WORKER_NAME; do + revert_snapshot_windows ${WORKER_NAME} + # Some tests need us.gcr.io/k8s-artifacts-prod/e2e-test-images/agnhost:2.13 image but it is not for windows/amd64 10.0.17763 + # Use e2eteam/agnhost:2.13 instead + harbor_images=("sigwindowstools-kube-proxy:v1.18.0" "agnhost:2.13" "agnhost:2.13" "agnhost:2.29" "e2eteam-jessie-dnsutils:1.0" "e2eteam-pause:3.2") + antrea_images=("sigwindowstools/kube-proxy:v1.18.0" "e2eteam/agnhost:2.13" "us.gcr.io/k8s-artifacts-prod/e2e-test-images/agnhost:2.13" "k8s.gcr.io/e2e-test-images/agnhost:2.29" "e2eteam/jessie-dnsutils:1.0" "e2eteam/pause:3.2") + k8s_images=("k8s.gcr.io/e2e-test-images/agnhost:2.40" "k8s.gcr.io/e2e-test-images/jessie-dnsutils:1.5" "k8s.gcr.io/e2e-test-images/nginx:1.14-2") + e2e_images=("k8sprow.azurecr.io/kubernetes-e2e-test-images/agnhost:2.40" "k8sprow.azurecr.io/kubernetes-e2e-test-images/jessie-dnsutils:1.5" "k8sprow.azurecr.io/kubernetes-e2e-test-images/nginx:1.14-2") + # Pull necessary images in advance to avoid transient error + for i in "${!harbor_images[@]}"; do + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "ctr -n k8s.io images pull ${DOCKER_REGISTRY}/antrea/${harbor_images[i]} && ctr -n k8s.io images tag ${DOCKER_REGISTRY}/antrea/${harbor_images[i]} ${antrea_images[i]}" || true + done + for i in "${!k8s_images[@]}"; do + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "ctr -n k8s.io images pull ${k8s_images[i]} && ctr -n k8s.io images tag ${k8s_images[i]} ${e2e_images[i]}" || true + done + if ! (test -f antrea-windows.tar.gz); then + echo "Windows VM ${WIN_JUMPER} didn't build antrea-windows.tar.gz, exiting" + exit 1 + else + for i in `seq 2`; do + timeout 2m scp -o StrictHostKeyChecking=no -T antrea-windows.tar.gz Administrator@${IP}: && break + done + ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "gzip -d antrea-windows.tar.gz && ctr -n k8s.io images import antrea-windows.tar" + fi + done + rm -f antrea-windows.tar +} + function deliver_antrea { echo "====== Cleanup Antrea Installation ======" clean_up_one_ns "monitoring" || true @@ -494,7 +611,7 @@ function deliver_antrea { fi cp -f build/yamls/*.yml $WORKDIR - echo "====== Delivering Antrea to all the Nodes ======" + echo "====== Delivering Antrea to all Nodes ======" docker save -o antrea-ubuntu.tar antrea/antrea-ubuntu:latest docker save -o flow-aggregator.tar antrea/flow-aggregator:latest @@ -623,6 +740,8 @@ function run_e2e_windows { set +e mkdir -p `pwd`/antrea-test-logs + + echo "====== Run test with e2e test ======" go test -v antrea.io/antrea/test/e2e --logs-export-dir `pwd`/antrea-test-logs --provider remote -timeout=50m --prometheus if [[ "$?" != "0" ]]; then TEST_FAILURE=true @@ -649,9 +768,10 @@ function run_conformance_windows { wait_for_antrea_windows_pods_ready fi - echo "====== Run test with e2e.test ======" + echo "====== Run test with conformance test ======" export KUBE_TEST_REPO_LIST=${WORKDIR}/repo_list if [ "$TESTCASE" == "windows-networkpolicy" ]; then + # Allow LinuxOnly mark in windows-networkpolicy because Antrea Windows supports NP functions. ginkgo --noColor $E2ETEST_PATH -- --provider=skeleton --ginkgo.focus="$WINDOWS_NETWORKPOLICY_FOCUS" --ginkgo.skip="$WINDOWS_NETWORKPOLICY_SKIP" > windows_conformance_result_no_color.txt || true else ginkgo --noColor $E2ETEST_PATH -- --provider=skeleton --node-os-distro=windows --ginkgo.focus="$WINDOWS_CONFORMANCE_FOCUS" --ginkgo.skip="$WINDOWS_CONFORMANCE_SKIP" > windows_conformance_result_no_color.txt || true @@ -666,6 +786,41 @@ function run_conformance_windows { fi } +function run_conformance_windows_containerd { + echo "====== Running Antrea Conformance Tests ======" + export GO111MODULE=on + export GOPATH=${WORKDIR}/go + export GOROOT=/usr/local/go + export GOCACHE=${WORKDIR}/.cache/go-build + export PATH=$GOROOT/bin:$PATH + + if [[ "$TESTCASE" == "windows-networkpolicy-process" ]]; then + # Antrea Windows agents are deployed with scripts as processes on host for Windows NetworkPolicy test + wait_for_antrea_windows_processes_ready + else + # Antrea Windows agent Pods are deployed for Windows Conformance test + clean_for_windows_install_cni + wait_for_antrea_windows_pods_ready + fi + + echo "====== Run test with conformance test ======" + export KUBE_TEST_REPO_LIST=${WORKDIR}/repo_list + if [ "$TESTCASE" == "windows-containerd-networkpolicy" ]; then + # Allow LinuxOnly mark in windows-containerd-networkpolicy because Antrea Windows supports NP functions. + ginkgo -timeout=2h --no-color $E2ETEST_PATH -- --provider=skeleton --ginkgo.focus="$WINDOWS_NETWORKPOLICY_FOCUS" --ginkgo.skip="$WINDOWS_NETWORKPOLICY_CONTAINERD_SKIP" > windows_conformance_result_no_color.txt || true + else + ginkgo --no-color $E2ETEST_PATH -- --provider=skeleton --node-os-distro=windows --ginkgo.focus="$WINDOWS_CONFORMANCE_FOCUS" --ginkgo.skip="$WINDOWS_CONFORMANCE_SKIP" > windows_conformance_result_no_color.txt || true + fi + + if grep -Fxq "Test Suite Failed" windows_conformance_result_no_color.txt; then + echo "=== Failed cases exist ===" + TEST_FAILURE=true + collect_windows_network_info_and_logs + else + echo "All tests passed." + fi +} + function run_install_windows_ovs { echo "===== Verify Install-OVS =====" export_govc_env_var @@ -896,11 +1051,21 @@ fi trap clean_antrea EXIT if [[ ${TESTCASE} =~ "windows" ]]; then - deliver_antrea_windows - if [[ ${TESTCASE} =~ "e2e" ]]; then - run_e2e_windows + if [[ ${TESTCASE} =~ "containerd" ]]; then + WINDOWS_YAML_SUFFIX="windows-containerd" + deliver_antrea_windows_containerd + if [[ ${TESTCASE} =~ "e2e" ]]; then + run_e2e_windows + else + run_conformance_windows_containerd + fi else - run_conformance_windows + deliver_antrea_windows + if [[ ${TESTCASE} =~ "e2e" ]]; then + run_e2e_windows + else + run_conformance_windows + fi fi elif [[ ${TESTCASE} =~ "e2e" ]]; then deliver_antrea diff --git a/docs/windows.md b/docs/windows.md index 7667be5def8..2bfd7f92eea 100644 --- a/docs/windows.md +++ b/docs/windows.md @@ -109,11 +109,11 @@ Start-Service kube-proxy Start-Service antrea-agent ``` -### Installation via wins (Docker based runtimes) +### Installation as a Pod (Docker/Containerd based runtimes) -Installing Antrea using [wins](https://github.com/rancher/wins) gives you a lot of flexibility to manage it as a Pod, but -currently this only works with Docker due to a bug in the way Containerd handles host networking for Windows Pods ([Issue](https://github.com/containerd/containerd/issues/4856)). -In any case, if you are using Docker on Windows, this is how you can run Antrea in a Pod. +Installing Antrea using [wins](https://github.com/rancher/wins) gives you a lot +of flexibility to manage it as a Pod if you are using Docker on Windows, this is +how you can run Antrea in a Pod. #### Download & Configure Antrea for Linux @@ -138,7 +138,7 @@ curl -L "https://github.com/kubernetes-sigs/sig-windows-tools/releases/download/ ``` Replace the content of `run-script.ps1` in configmap named `kube-proxy-windows` -as following: +with following: ```yaml apiVersion: v1 @@ -162,6 +162,27 @@ metadata: namespace: kube-system ``` +For Containerd runtime, replace the content of `run-script.ps1` with following: + +```yaml +apiVersion: v1 +data: + run-script.ps1: |- + $mountPath = $env:CONTAINER_SANDBOX_MOUNT_POINT + $mountPath = ($mountPath.Replace('\', '/')).TrimEnd('/') + New-Item -Path "c:/var/lib" -Name "kube-proxy" -ItemType "directory" -Force + ((Get-Content -path $mountPath/var/lib/kube-proxy/kubeconfig.conf -Raw) -replace '/var',"$($mountPath)/var") | Set-Content -Path /var/lib/kube-proxy/kubeconfig.conf + ((Get-Content -path /var/lib/kube-proxy/kubeconfig.conf -Raw) -replace '\/',"/") | Set-Content -Path /var/lib/kube-proxy/kubeconfig.conf + sed -i 's/mode: iptables/mode: \"\"/g' $mountPath/var/lib/kube-proxy/config.conf + & "$mountPath/k/kube-proxy/kube-proxy.exe" --config=$mountPath/var/lib/kube-proxy/config.conf --v=10 --proxy-mode=userspace --hostname-override=$env:NODE_NAME +kind: ConfigMap +metadata: + labels: + app: kube-proxy + name: kube-proxy-windows + namespace: kube-system +``` + Set the `hostNetwork` option as true in spec of kube-proxy-windows daemonset. ```yaml @@ -184,6 +205,39 @@ spec: hostNetwork: true ``` +For Containerd runtime, Set `hostNetwork` as true and add options in spec and command. + +```yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + k8s-app: kube-proxy + name: kube-proxy-windows + namespace: kube-system +spec: + selector: + matchLabels: + k8s-app: kube-proxy-windows + template: + metadata: + labels: + k8s-app: kube-proxy-windows + spec: + securityContext: + windowsOptions: + hostProcess: true + runAsUserName: "NT AUTHORITY\\SYSTEM" + hostNetwork: true + serviceAccountName: kube-proxy + containers: + - command: + - pwsh + args: + - -file + - $env:CONTAINER_SANDBOX_MOUNT_POINT/var/lib/kube-proxy-windows/run-script.ps1 +``` + Then apply the `kube-proxy.yml`. ```bash @@ -192,7 +246,7 @@ kubectl apply -f kube-proxy.yml #### Add Windows antrea-agent DaemonSet -Now you can deploy antrea-agent Windows DaemonSet by applying file `antrea-windows.yml`. +Now you can deploy antrea-agent Windows DaemonSet with Docker runtime by applying file `antrea-windows.yml`. Download and apply `antrea-windows.yml`. @@ -201,6 +255,15 @@ Download and apply `antrea-windows.yml`. kubectl apply -f https://github.com/antrea-io/antrea/releases/download//antrea-windows.yml ``` +Since Antrea 1.10, you can also deploy antrea-agent Windows DaemonSet with Containerd runtime by +applying file `antrea-windows-containerd.yml`. + +Download and apply `antrea-windows-containerd.yml`. + +```bash +kubectl apply -f https://github.com/antrea-io/antrea/releases/download//antrea-windows-containerd.yml +``` + #### Join Windows worker Nodes #### 1. (Optional) Install OVS (provided by Antrea or your own) diff --git a/hack/generate-manifest-windows.sh b/hack/generate-manifest-windows.sh index 19858085f7a..54594276a5b 100755 --- a/hack/generate-manifest-windows.sh +++ b/hack/generate-manifest-windows.sh @@ -41,6 +41,7 @@ function print_help { echoerr "Try '$0 --help' for more information." } +RUNTIME="" MODE="dev" KEEP=false @@ -57,6 +58,10 @@ case $key in KEEP=true shift ;; + --containerd) + RUNTIME="containerd" + shift + ;; -h|--help) print_usage exit 0 @@ -108,10 +113,17 @@ BASE=../../base mkdir $MODE && cd $MODE touch kustomization.yml -$KUSTOMIZE edit add base $BASE # ../../patches/$MODE may be empty so we use find and not simply cp find ../../patches/$MODE -name \*.yml -exec cp {} . \; +if [ "$RUNTIME" == "containerd" ]; then + sed -i.bak "s/agent.yml/agent-containerd.yml/g" $BASE/kustomization.yml + sed -i.bak "s/Run-AntreaAgent.ps1/Run-AntreaAgent-Containerd.ps1/g" $BASE/kustomization.yml + sed -i.bak "/name: antrea-agent-windows/i\ - conf/Install-WindowsCNI-Containerd.ps1" $BASE/kustomization.yml +fi + +$KUSTOMIZE edit add base $BASE + if [ "$MODE" == "dev" ]; then $KUSTOMIZE edit set image antrea-windows=antrea/antrea-windows:latest $KUSTOMIZE edit add patch --path imagePullPolicy.yml @@ -123,6 +135,11 @@ fi $KUSTOMIZE build +if [ "$RUNTIME" == "containerd" ]; then + rm $BASE/kustomization.yml + mv $BASE/kustomization.yml.bak $BASE/kustomization.yml +fi + popd > /dev/null if $KEEP; then diff --git a/hack/release/prepare-assets.sh b/hack/release/prepare-assets.sh index 5e437371aa2..d1a2214ed49 100755 --- a/hack/release/prepare-assets.sh +++ b/hack/release/prepare-assets.sh @@ -113,6 +113,7 @@ export IMG_NAME=projects.registry.vmware.com/antrea/octant-antrea-ubuntu export IMG_NAME=projects.registry.vmware.com/antrea/antrea-windows ./hack/generate-manifest-windows.sh --mode release > "$OUTPUT_DIR"/antrea-windows.yml +./hack/generate-manifest-windows.sh --mode release --containerd > "$OUTPUT_DIR"/antrea-windows-containerd.yml export IMG_NAME=projects.registry.vmware.com/antrea/flow-aggregator ./hack/generate-manifest-flow-aggregator.sh --mode release > "$OUTPUT_DIR"/flow-aggregator.yml diff --git a/test/e2e/traceflow_test.go b/test/e2e/traceflow_test.go index a3479ceaab1..0288f53af8d 100644 --- a/test/e2e/traceflow_test.go +++ b/test/e2e/traceflow_test.go @@ -103,6 +103,9 @@ func testTraceflowIntraNodeANP(t *testing.T, data *TestData) { node1 := nodeName(nodeIdx) node1Pods, _, node1CleanupFn := createTestAgnhostPods(t, data, 3, data.testNamespace, node1) defer node1CleanupFn() + // Give a little time for Windows Containerd Nodes to setup OVS. + // Containerd configures port asynchronously, which could cause execution time of installing flow longer than docker. + time.Sleep(time.Second * 1) var denyIngress *v1alpha1.NetworkPolicy denyIngressName := "test-anp-deny-ingress" @@ -300,6 +303,9 @@ func testTraceflowIntraNode(t *testing.T, data *TestData) { agentPod, _ := data.getAntreaPodOnNode(node1) node1Pods, node1IPs, node1CleanupFn := createTestAgnhostPods(t, data, 3, data.testNamespace, node1) defer node1CleanupFn() + // Give a little time for Windows Containerd Nodes to setup OVS. + // Containerd configures port asynchronously, which could cause execution time of installing flow longer than docker. + time.Sleep(time.Second * 1) var pod0IPv4Str, pod1IPv4Str, dstPodIPv4Str, dstPodIPv6Str string if node1IPs[0].ipv4 != nil { pod0IPv4Str = node1IPs[0].ipv4.String() @@ -2012,6 +2018,9 @@ func testTraceflowExternalIP(t *testing.T, data *TestData) { nodeIP := nodeIP(nodeIdx) podNames, _, cleanupFn := createTestAgnhostPods(t, data, 1, data.testNamespace, node) defer cleanupFn() + // Give a little time for Windows Containerd Nodes to setup OVS. + // Containerd configures port asynchronously, which could cause execution time of installing flow longer than docker. + time.Sleep(time.Second * 1) testcase := testcase{ name: "nodeIPDestination",