Skip to content

Commit

Permalink
ci: validate pods and systemd-networkd restart for PRs (#1909)
Browse files Browse the repository at this point in the history
* update script to check cns in memory and add to pr pipeline

* adding stage to both overlay and podsubnet cilium stages

* add exit case if priveleged pod is not found

* check status of priv pod

* call ds status before exit

* install cilium ds with kubectl and not helm for systemd-networkd initcontainer patch

* upload cilium ds

* adding files for cilium-agent and cilium-operator deployment

* update cilium ds

* addressing comments
  • Loading branch information
camrynl authored and jpayne3506 committed Sep 11, 2023
1 parent 64bfb38 commit 5949700
Show file tree
Hide file tree
Showing 12 changed files with 983 additions and 12 deletions.
17 changes: 12 additions & 5 deletions .pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,13 @@ steps:
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
kubectl cluster-info
kubectl get po -owide -A
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
chmod 700 get_helm.sh
./get_helm.sh
echo "deploy Cilium ConfigMap"
kubectl apply -f cilium/configmap.yaml
kubectl apply -f test/integration/manifests/cilium/cilium-config.yaml
echo "install Cilium"
helm repo add cilium https://helm.cilium.io/
helm install cilium cilium/cilium --version 1.12.5 --namespace kube-system -f cilium/cilium_helm_values.yaml
kubectl apply -f test/integration/manifests/cilium/cilium-agent
kubectl apply -f test/integration/manifests/cilium/cilium-operator
kubectl get po -owide -A
name: "installCilium"
displayName: "Install Cilium"
Expand Down Expand Up @@ -111,6 +110,14 @@ steps:
pathtoPublish: "$(Build.ArtifactStagingDirectory)/test-output"
condition: always()

- script: |
echo "validate pod IP assignment and check systemd-networkd restart"
kubectl apply -f hack/manifests/hostprocess.yaml
kubectl get pod -owide -A
bash hack/scripts/validate_state.sh
name: "validatePods"
displayName: "Validate Pods"
- script: |
echo "Run Service Conformance E2E"
export PATH=${PATH}:/usr/local/bin/gsutil
Expand Down
17 changes: 12 additions & 5 deletions .pipelines/singletenancy/overlay/overlay-e2e-step-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,13 @@ steps:
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
kubectl cluster-info
kubectl get po -owide -A
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
chmod 700 get_helm.sh
./get_helm.sh
echo "deploy Cilium ConfigMap"
kubectl apply -f cilium/configmap.yaml
kubectl apply -f test/integration/manifests/cilium/cilium-config.yaml
echo "install Cilium onto Overlay Cluster"
helm repo add cilium https://helm.cilium.io/
helm install cilium cilium/cilium --version 1.12.5 --namespace kube-system -f cilium/cilium_helm_values.yaml
kubectl apply -f test/integration/manifests/cilium/cilium-agent
kubectl apply -f test/integration/manifests/cilium/cilium-operator
kubectl get po -owide -A
name: "installCilium"
displayName: "Install Cilium on AKS Overlay"
Expand Down Expand Up @@ -117,6 +116,14 @@ steps:
pathtoPublish: "$(Build.ArtifactStagingDirectory)/test-output"
condition: always()

- script: |
echo "validate pod IP assignment and check systemd-networkd restart"
kubectl apply -f hack/manifests/hostprocess.yaml
kubectl get pod -owide -A
bash hack/scripts/validate_state.sh
name: "validatePods"
displayName: "Validate Pods"
- script: |
echo "Run Service Conformance E2E"
export PATH=${PATH}:/usr/local/bin/gsutil
Expand Down
36 changes: 34 additions & 2 deletions hack/scripts/validate_state.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ do
echo "Node internal ip: $node_ip"
privileged_pod=$(kubectl get pods -n kube-system -l app=privileged-daemonset -o wide | grep "$node_name" | awk '{print $1}')
echo "privileged pod : $privileged_pod"
if [ "$privileged_pod" == '' ]; then
kubectl describe daemonset privileged-daemonset -n kube-system
exit 1
fi
while ! [ -s "azure_endpoints.json" ]
do
echo "trying to get the azure_endpoints"
Expand All @@ -34,6 +38,16 @@ do
sleep 10
done

cns_pod=$(kubectl get pod -l k8s-app=azure-cns -n kube-system -o wide | grep "$node_name" | awk '{print $1}')
echo "azure-cns pod : $cns_pod"

while ! [ -s "cns_endpoints.json" ]
do
echo "trying to get the cns_endpoints"
kubectl exec -it "$cns_pod" -n kube-system -- curl localhost:10090/debug/ipaddresses -d '{"IPConfigStateFilter":["Assigned"]}' > cns_endpoints.json
sleep 10
done

total_pods=$(kubectl get pods --all-namespaces -o wide --field-selector spec.nodeName="$node_name",status.phase=Running --output json)

echo "Checking if there are any pods with no ips"
Expand All @@ -60,7 +74,7 @@ do
echo "Number of azure endpoint ips : $num_of_azure_endpoint_ips"

if [ "$num_of_pod_ips" != "$num_of_azure_endpoint_ips" ]; then
printf "Error: Number of pods in running state is less than total ips in the azure ednpoint file" >&2
printf "Error: Number of pods in running state is less than total ips in the azure endpoint file" >&2
exit 1
fi

Expand Down Expand Up @@ -92,7 +106,25 @@ do
fi
done

num_of_cns_endpoints=$(cat cns_endpoints.json | jq -r '[.IPConfigurationStatus | .[] | select(.IPAddress != null)] | length')
cns_endpoint_ips=$(cat cns_endpoints.json | jq -r '(.IPConfigurationStatus | .[] | select(.IPAddress != null) | .IPAddress)')
echo "Number of cns endpoints: $num_of_cns_endpoints"

if [ "$num_of_pod_ips" != "$num_of_cns_endpoints" ]; then
printf "Error: Number of pods in running state is less than total ips in the cns endpoint file" >&2
exit 1
fi

for ip in "${pod_ips[@]}"
do
find_in_array "$cns_endpoint_ips" "$ip" "cns_endpoints.json"
if [[ $? -eq 1 ]]; then
printf "Error: %s Not found in the cns_endpoints.json" "$ip" >&2
exit 1
fi
done

#We are restarting the systmemd network and checking that the connectivity works after the restart. For more details: https://github.com/cilium/cilium/issues/18706
kubectl exec -i "$privileged_pod" -n kube-system -- bash -c "chroot /host /bin/bash -c 'systemctl restart systemd-networkd'"
rm -rf cilium_endpoints.json azure_endpoints.json
rm -rf cilium_endpoints.json azure_endpoints.json cns_endpoints.json
done
95 changes: 95 additions & 0 deletions test/integration/manifests/cilium/cilium-agent/clusterrole.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cilium
rules:
- apiGroups:
- networking.k8s.io
resources:
- networkpolicies
verbs:
- get
- list
- watch
- apiGroups:
- discovery.k8s.io
resources:
- endpointslices
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- namespaces
- services
- pods
- endpoints
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- list
- watch
# This is used when validating policies in preflight. This will need to stay
# until we figure out how to avoid "get" inside the preflight, and then
# should be removed ideally.
- get
- apiGroups:
- cilium.io
resources:
- ciliumbgploadbalancerippools
- ciliumbgppeeringpolicies
- ciliumclusterwideenvoyconfigs
- ciliumclusterwidenetworkpolicies
- ciliumegressgatewaypolicies
- ciliumegressnatpolicies
- ciliumendpoints
- ciliumendpointslices
- ciliumenvoyconfigs
- ciliumidentities
- ciliumlocalredirectpolicies
- ciliumnetworkpolicies
- ciliumnodes
verbs:
- list
- watch
- apiGroups:
- cilium.io
resources:
- ciliumidentities
- ciliumendpoints
- ciliumnodes
verbs:
- create
- apiGroups:
- cilium.io
resources:
- ciliumendpoints
verbs:
- delete
- get
- apiGroups:
- cilium.io
resources:
- ciliumnodes
- ciliumnodes/status
verbs:
- get
- update
- apiGroups:
- cilium.io
resources:
- ciliumnetworkpolicies/status
- ciliumclusterwidenetworkpolicies/status
- ciliumendpoints/status
- ciliumendpoints
verbs:
- patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cilium
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cilium
subjects:
- kind: ServiceAccount
name: "cilium"
namespace: kube-system
Loading

0 comments on commit 5949700

Please sign in to comment.