Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change: startup parameters of cilium-agent #125

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions policy/cilium/01-overwrite-endpoint-when-conflicting.patch
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
From 0317be58930a32d28a7e4971fb6ae85d007ac4e2 Mon Sep 17 00:00:00 2001
From f02e30ee9c3eb24625f27c4a815f512c3136e241 Mon Sep 17 00:00:00 2001
From: Lyt99 <[email protected]>
Date: Thu, 30 Jul 2020 14:26:07 +0800
Subject: [PATCH] overwrite endpoint when conflicting

---
daemon/cmd/endpoint.go | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
daemon/cmd/endpoint.go | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/daemon/cmd/endpoint.go b/daemon/cmd/endpoint.go
index ee81aeede..8185f69d1 100644
index ee81aeede..671cab315 100644
--- a/daemon/cmd/endpoint.go
+++ b/daemon/cmd/endpoint.go
@@ -343,7 +343,8 @@ func (d *Daemon) createEndpoint(ctx context.Context, owner regeneration.Owner, e
@@ -343,7 +343,9 @@ func (d *Daemon) createEndpoint(ctx context.Context, owner regeneration.Owner, e
if err != nil {
return invalidDataError(ep, err)
} else if oldEp != nil {
- return invalidDataError(ep, fmt.Errorf("IP %s is already in use", id))
+ d.deleteEndpoint(ep)
+ oldEp.Logger("api").Warning("endpoint conflicted, deleting")
+ d.deleteEndpoint(oldEp)
+ // return invalidDataError(ep, fmt.Errorf("IP %s is already in use", id))
}
}

--
2.27.0
2.28.0

3 changes: 2 additions & 1 deletion policy/policyinit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ if [ -f "/etc/cni/net.d/10-terway.conflist" ]; then
echo "using cilium as network routing & policy"
exec cilium-agent --tunnel=disabled --masquerade=false --enable-ipv6=false --enable-policy=$ENABLE_POLICY \
--agent-health-port=9099 --disable-envoy-version-check=true \
--enable-local-node-route=false --ipv4-range=169.254.0.0/16
--enable-local-node-route=false --ipv4-range=169.254.10.0/30 \
--ipam=cluster-pool --bpf-map-dynamic-size-ratio=0.0025
fi

# default for veth
Expand Down
6 changes: 3 additions & 3 deletions terway-cilium.yml
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ spec:
initContainers:
- name: terway-init
image: registry.cn-hangzhou.aliyuncs.com/acs/terway:v1.0.10.214-ge008649-aliyun
imagePullPolicy: Always
imagePullPolicy: IfNotPresent
securityContext:
privileged: true
command:
Expand All @@ -171,7 +171,7 @@ spec:
containers:
- name: terway
image: registry.cn-hangzhou.aliyuncs.com/acs/terway:v1.0.10.214-ge008649-aliyun
imagePullPolicy: Always
imagePullPolicy: IfNotPresent
command: ['/usr/bin/terwayd', '-log-level', 'debug', '-daemon-mode', 'ENIMultiIP']
securityContext:
privileged: true
Expand Down Expand Up @@ -201,7 +201,7 @@ spec:
name: device-plugin-path
- name: policy
image: registry.cn-hangzhou.aliyuncs.com/acs/terway:v1.0.10.214-ge008649-aliyun
imagePullPolicy: Always
imagePullPolicy: IfNotPresent
command: ["/bin/policyinit.sh"]
env:
- name: NODENAME
Expand Down
6 changes: 3 additions & 3 deletions terway-multiip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ spec:
initContainers:
- name: terway-init
image: registry.aliyuncs.com/acs/terway:v1.0.10.122-gd0be015-aliyun
imagePullPolicy: Always
imagePullPolicy: IfNotPresent
securityContext:
privileged: true
command:
Expand All @@ -142,7 +142,7 @@ spec:
containers:
- name: terway
image: registry.aliyuncs.com/acs/terway:v1.0.10.122-gd0be015-aliyun
imagePullPolicy: Always
imagePullPolicy: IfNotPresent
command: ['/usr/bin/terwayd', '-log-level', 'debug', '-daemon-mode', 'ENIMultiIP']
securityContext:
privileged: true
Expand Down Expand Up @@ -172,7 +172,7 @@ spec:
name: device-plugin-path
- name: policy
image: registry.aliyuncs.com/acs/terway:v1.0.10.122-gd0be015-aliyun
imagePullPolicy: Always
imagePullPolicy: IfNotPresent
command: ["/bin/policyinit.sh"]
env:
- name: NODENAME
Expand Down
6 changes: 3 additions & 3 deletions terway.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ spec:
initContainers:
- name: terway-init
image: registry.aliyuncs.com/acs/terway:v1.0.10.122-gd0be015-aliyun
imagePullPolicy: Always
imagePullPolicy: IfNotPresent
securityContext:
privileged: true
command: ['sh', '-c', 'cp /usr/bin/terway /opt/cni/bin/; chmod +x /opt/cni/bin/terway; cp /etc/eni/10-terway.conf /etc/cni/net.d/; modprobe sch_htb || true']
Expand All @@ -126,7 +126,7 @@ spec:
containers:
- name: terway
image: registry.aliyuncs.com/acs/terway:v1.0.10.122-gd0be015-aliyun
imagePullPolicy: Always
imagePullPolicy: IfNotPresent
securityContext:
privileged: true
env:
Expand Down Expand Up @@ -156,7 +156,7 @@ spec:
- name: policy
image: registry.aliyuncs.com/acs/terway:v1.0.10.122-gd0be015-aliyun
command: ["/bin/policyinit.sh"]
imagePullPolicy: Always
imagePullPolicy: IfNotPresent
env:
- name: NODENAME
valueFrom:
Expand Down
23 changes: 23 additions & 0 deletions tests/auto_network_policy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/sh

# auto test network policy continuously with TEST_INTERVAL
TEST_INTERVAL=300

while true
do
echo "Begin test at $(date)"

bats network_policy.bats
if [ $? -ne 0 ]; then
curl -X POST "https://oapi.dingtalk.com/robot/send?access_token=$TOKEN" -H 'cache-control: no-cache' -H 'content-type: application/json' -d '{
"msgtype": "text",
"text": {
"content": "terway network policy test failed!"
}
}'
else
echo "Test succeed at $(date)"
fi

sleep $TEST_INTERVAL
done
24 changes: 24 additions & 0 deletions tests/helpers.bash
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,21 @@ function pod_running() {
false
}

function pods_all_running() {
run kubectl get $@ --no-headers
if [[ "$status" -eq 0 ]] && [[ ${#lines[@]} -gt 0 ]]; then
local running
local all
running=$(echo "$output" | grep -c "Running")
all=$(echo "$output" | wc -l)
if [[ "$running" -eq "$all" ]]; then
return 0
fi
fi
echo "object $@ not ready, status: $status, lines: ${#lines[@]} output $output"
false
}

function object_not_exist() {
run kubectl get $@
if [[ "$status" -gt 0 ]] || [[ ${#lines[@]} -eq 1 ]]; then
Expand All @@ -74,6 +89,15 @@ function loadbalancer_ready() {
false
}

function deployment_ready() {
run kubectl get $@ -o json
if [[ "$status" -eq 0 ]] && [[ ${#lines[@]} -gt 1 ]] && echo $output | jq ".status.replicas == .status.readyReplicas" | grep "true"; then
return 0
fi
echo "deployment $@ not ready, status: $status, lines: ${#lines[@]}"
false
}

# Prepare curl operation
function prepare_curl_options() {
if [ x"$DOCKER_TLS_VERIFY" = x"1" ]; then
Expand Down
22 changes: 22 additions & 0 deletions tests/stress/auto_startup_time.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

TEST_INTERVAL=600

while true
do
echo "Begin test at $(date)"

bats startup_time.bats
if [ $? -ne 0 ]; then
curl -X POST "https://oapi.dingtalk.com/robot/send?access_token=$TOKEN" -H 'cache-control: no-cache' -H 'content-type: application/json' -d '{
"msgtype": "text",
"text": {
"content": "terway startup time test failed!"
}
}'
else
echo "Test succeed at $(date)"
fi

sleep $TEST_INTERVAL
done
23 changes: 23 additions & 0 deletions tests/stress/service.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash

# this script creates a deployment with several pods and SERVICE_TOTAL service(s)
# see: ../templates/testcases/stress/service.yml && ../templates/testcases/stress/nginx-pod-service.yml

SERVICE_YAML=$(cat ../templates/testcases/stress/service.yml)
SERVICE_TOTAL=2000

generate_service() {
local name
name="nginx-service-$1"
echo "${SERVICE_YAML/SERVICENAME/$name}"
}

# apply deployment
kubectl apply -f ../templates/testcases/stress/nginx-pod-service.yml

# apply service
for (( i=0; i<SERVICE_TOTAL; i=i+1 )); do
echo "Apply service $i"
generate_service $i | kubectl apply -f '-'
done

65 changes: 65 additions & 0 deletions tests/stress/startup_time.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env bats
load ../helpers

# This testcase for measuring the deployment time of pods for large scale cluster
# see: templates/testcases/stress/nginx-pod.yml

# get interval of a pod from "Initialized" to "Ready"
function get_interval() {
# check pod ready
local ready
ready=$(kubectl get $1 -o jsonpath='{range .status.conditions[?(@.type == "Ready")]}{.status}{end}')
[ $ready = "True" ]

local init_time
local ready_time

init_time=$(kubectl get $1 -o jsonpath='{range .status.conditions[?(@.type == "Initialized")]}{.lastTransitionTime}{end}')
ready_time=$(kubectl get $1 -o jsonpath='{range .status.conditions[?(@.type == "Ready")]}{.lastTransitionTime}{end}')
init_time=$(date --date "$init_time" "+%s")
ready_time=$(date --date "$ready_time" "+%s")

echo $((ready_time - init_time))
}

# executed before each test
setup() {
# make log dir
mkdir logs || true
# clean deployment
kubectl delete deployment nginx-deployment || true
retry 30 5 object_not_exist pod -l app=nginx-test
}

@test "startup pod" {
# apply deployment, with name "nginx-deployment" and pod label "app=nginx-test"
kubectl apply -f ../templates/testcases/stress/nginx-pod.yml
# wait for all pods ready
retry 20 5 deployment_ready deployment nginx-deployment
retry 20 3 pods_all_running pod -l app=nginx-test
# get intervals
local file_name="logs/startup_time_$(date "+%m%d.%H-%M-%S").log"
local count=0
local max=0
# initial value of min should be bigger than timeout (wait for all pods ready)
local min=1000
local avg=0

for i in $(kubectl get pod -l app=nginx-test --field-selector="status.phase=Running" -o name)
do
count=$((count + 1))

local interval
interval=$(get_interval $i)

echo "$interval $i" >> $file_name # <interval>\t<pod_name>

if [ $min -gt $interval ]; then min=$interval; fi
if [ $max -lt $interval ]; then max=$interval; fi
avg=$((avg + interval))
done
avg=$((avg / count))

echo "total: $count, min: $min, max: $max, avg: $avg" >> $file_name
}

6 changes: 3 additions & 3 deletions tests/templates/testcases/network_policy/network-policy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ spec:
initContainers:
- name: waiting
image: registry.aliyuncs.com/acs/alpine:3.6
command: ["sh", "-c", "for i in `seq 1 120`; do nslookup policy-svc && nslookup non-policy-svc && wget -T 1 non-policy-svc && exit 0; sleep 1; done"]
command: ["sh", "-c", "for i in `seq 1 120`; do nslookup policy-svc && nslookup non-policy-svc && wget -T 1 non-policy-svc && sleep 30 && exit 0; sleep 1; done"]
containers:
- name: policy
image: registry.aliyuncs.com/acs/alpine:3.6
Expand Down Expand Up @@ -147,11 +147,11 @@ spec:
initContainers:
- name: waiting
image: registry.aliyuncs.com/acs/alpine:3.6
command: ["sh", "-c", "for i in `seq 1 120`; do nslookup policy-svc && nslookup non-policy-svc && wget -T 1 non-policy-svc && exit 0; sleep 1; done"]
command: ["sh", "-c", "for i in `seq 1 120`; do nslookup policy-svc && nslookup non-policy-svc && wget -T 1 non-policy-svc && sleep 30 && exit 0; sleep 1; done"]
containers:
- name: policy
image: registry.aliyuncs.com/acs/alpine:3.6
command: ["sh", "-c", "wget -T 1 policy-svc"]
- name: non-policy
image: registry.aliyuncs.com/acs/alpine:3.6
command: ["sh", "-c", "wget -T 1 non-policy-svc"]
command: ["sh", "-c", "wget -T 1 non-policy-svc"]
58 changes: 58 additions & 0 deletions tests/templates/testcases/stress/nginx-pod-service.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
apiVersion: v1
kind: Namespace
metadata:
name: stress-service

---

apiVersion: v1
kind: ConfigMap
metadata:
name: nginx-configmap
namespace: stress-service
data:
nginx.conf: |
worker_processes auto;
events {
}
http {
server {
server_name nginx;
listen 80 default_server;
location /healthz {
return 200 "ok";
}
}
}

---

apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: nginx-deployment
namespace: stress-service
spec:
replicas: 50
template:
metadata:
labels:
app: nginx-test
spec:
volumes:
- name: nginx
configMap:
name: nginx-configmap
containers:
- name: nginx
image: registry.aliyuncs.com/acs-sample/nginx
volumeMounts:
- name: nginx
mountPath: /etc/nginx
readOnly: true
readinessProbe:
httpGet:
path: /healthz
port: 80
initialDelaySeconds: 0
periodSeconds: 1
Loading