diff --git a/.github/workflows/cni-plugin-integration.yml b/.github/workflows/cni-plugin-integration.yml new file mode 100644 index 00000000..b1f87622 --- /dev/null +++ b/.github/workflows/cni-plugin-integration.yml @@ -0,0 +1,22 @@ +name: cni-plugin-integration + +on: + workflow_dispatch: + pull_request: + paths: + - Dockerfile-cni-plugin + - cni-plugin/integration/flannel/Dockerfile-tester + - cni-plugin/integration/run.sh + - cni-plugin/** + +jobs: + integration: + timeout-minutes: 15 + runs-on: ubuntu-20.04 + steps: + - uses: linkerd/dev/actions/setup-tools@v38 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b + - run: just k3d-create + - run: just build-cni-plugin-image + - run: just build-cni-plugin-test-image + - run: just cni-plugin-test-integration diff --git a/cni-plugin/integration/Dockerfile-tester b/cni-plugin/integration/Dockerfile-tester new file mode 100644 index 00000000..0c8e5bfb --- /dev/null +++ b/cni-plugin/integration/Dockerfile-tester @@ -0,0 +1,18 @@ +# syntax=docker/dockerfile:1.4 +# +# A single container holds all of the test code and it must be +# specified in `run.sh` which tests you want to run. +# +# There's no ENTRYPOINT as integration test runners will require +# two things: +# 1) a specific k3d cluster configured with CNI +# 2) a test suite (e.g. `flannel.go`) runs with a configured CNI plugin. + +FROM golang:1.18-alpine AS build +ENV GOCACHE=/tmp/ +WORKDIR /src +COPY --link go.mod go.sum . +COPY --link cni-plugin cni-plugin +COPY --link internal internal +COPY --link proxy-init proxy-init +RUN go mod tidy && go mod download diff --git a/cni-plugin/integration/flannel/flannel_test.go b/cni-plugin/integration/flannel/flannel_test.go new file mode 100644 index 00000000..2c732002 --- /dev/null +++ b/cni-plugin/integration/flannel/flannel_test.go @@ -0,0 +1,91 @@ +package flannel + +import ( + "encoding/json" + "flag" + "fmt" + "os" + "testing" +) + +const ( + ConfigDirectory = "/var/lib/rancher/k3s/agent/etc/cni/net.d" + FlannelConflist = "10-flannel.conflist" +) + +// Given a directory, return a map of filename->struct{} +func files(directory string) (map[string]struct{}, error) { + files, err := os.ReadDir(directory) + if err != nil { + return nil, err + } + + fileNames := make(map[string]struct{}, len(files)) + for _, f := range files { + fileNames[f.Name()] = struct{}{} + } + + return fileNames, nil +} + +func TestMain(m *testing.M) { + runTests := flag.Bool("integration-tests", false, "must be provided to run the integration tests") + flag.Parse() + + if !*runTests { + fmt.Fprintln(os.Stderr, "integration tests not enabled: enable with -integration-tests") + os.Exit(0) + } + + os.Exit(m.Run()) +} + +// TODO(stevej): this could be a test helper as we want it to be true for every CNI integration +func TestLinkerdIsLastCNIPlugin(t *testing.T) { + t.Parallel() + + t.Run("succeeds when linkerd-cni is the last plugin", func(t *testing.T) { + if _, err := os.Stat(ConfigDirectory); os.IsNotExist(err) { + t.Fatalf("Directory does not exist. Check if volume mount exists: %s", ConfigDirectory) + } + + filenames, err := files(ConfigDirectory) + + if err != nil { + t.Fatalf("unable to read files from directory %s due to error: %e", ConfigDirectory, err) + } + + if len(filenames) == 0 { + t.Fatalf("no files found in %s", ConfigDirectory) + } + + if len(filenames) > 2 { + t.Fatalf("too many files found in %s: %s ", ConfigDirectory, filenames) + } + + if _, ok := filenames[FlannelConflist]; !ok { + t.Fatalf("filenames does not contain %s, instead it contains: %s", FlannelConflist, filenames) + } + + conflistFile, err := os.ReadFile(ConfigDirectory + "/" + FlannelConflist) + if err != nil { + t.Fatalf("could not read %s: %e", FlannelConflist, err) + } + + var conflist map[string]any + err = json.Unmarshal(conflistFile, &conflist) + if err != nil { + t.Fatalf("unmarshaling conflist json failed: %e", err) + } + + if conflist["cniVersion"] != "1.0.0" { + t.Fatalf("expected cniVersion 1.0.0, instead saw %s", conflistFile) + } + + plugins := conflist["plugins"].([]interface{}) + lastPlugin := plugins[len(plugins)-1].(map[string]any) + if lastPlugin["name"] != "linkerd-cni" { + t.Fatalf("linkerd-cni was not last in the plugins list") + } + }) +} diff --git a/cni-plugin/integration/manifests/linkerd-cni.yaml b/cni-plugin/integration/manifests/linkerd-cni.yaml new file mode 100644 index 00000000..b9554030 --- /dev/null +++ b/cni-plugin/integration/manifests/linkerd-cni.yaml @@ -0,0 +1,177 @@ +## +## Everything below here is generated from the output `linkerd install-cni` +## and modified with the test image of the cni-plugin. +## +## `linkerd install-cni \ +## --dest-cni-net-dir "/var/lib/rancher/k3s/agent/etc/cni/net.d/" \ +## --dest-cni-bin-dir "/bin"` +## These flags are meant to enable cni to work properly with k3d/k3s. +## Also the log level is set to debug to simplify development. +## +## DO NOT hand edit. +## +kind: Namespace +apiVersion: v1 +metadata: + name: linkerd-cni + labels: + linkerd.io/cni-resource: "true" + config.linkerd.io/admission-webhooks: disabled +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: linkerd-cni + namespace: linkerd-cni + labels: + linkerd.io/cni-resource: "true" +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: linkerd-cni + labels: + linkerd.io/cni-resource: "true" +rules: +- apiGroups: [""] + resources: ["pods", "nodes", "namespaces", "services"] + verbs: ["list", "get", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: linkerd-cni + labels: + linkerd.io/cni-resource: "true" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: linkerd-cni +subjects: +- kind: ServiceAccount + name: linkerd-cni + namespace: linkerd-cni +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: linkerd-cni-config + namespace: linkerd-cni + labels: + linkerd.io/cni-resource: "true" +data: + dest_cni_net_dir: "/var/lib/rancher/k3s/agent/etc/cni/net.d" + dest_cni_bin_dir: "/bin" + # The CNI network configuration to install on each node. The special + # values in this config will be automatically populated. + cni_network_config: |- + { + "name": "linkerd-cni", + "type": "linkerd-cni", + "log_level": "debug", + "policy": { + "type": "k8s", + "k8s_api_root": "https://__KUBERNETES_SERVICE_HOST__:__KUBERNETES_SERVICE_PORT__", + "k8s_auth_token": "__SERVICEACCOUNT_TOKEN__" + }, + "kubernetes": { + "kubeconfig": "__KUBECONFIG_FILEPATH__" + }, + "linkerd": { + "incoming-proxy-port": 4143, + "outgoing-proxy-port": 4140, + "proxy-uid": 2102, + "ports-to-redirect": [], + "inbound-ports-to-ignore": ["4191","4190"], + "simulate": false, + "use-wait-flag": false + } + } +--- +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: linkerd-cni + namespace: linkerd-cni + labels: + k8s-app: linkerd-cni + linkerd.io/cni-resource: "true" + annotations: + linkerd.io/created-by: linkerd/cli edge-22.12.1 +spec: + selector: + matchLabels: + k8s-app: linkerd-cni + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 + template: + metadata: + labels: + k8s-app: linkerd-cni + annotations: + linkerd.io/created-by: linkerd/cli edge-22.12.1 + linkerd.io/cni-resource: "true" + linkerd.io/inject: disabled + spec: + tolerations: + - operator: Exists + nodeSelector: + kubernetes.io/os: linux + hostNetwork: true + serviceAccountName: linkerd-cni + containers: + # This container installs the linkerd CNI binaries + # and CNI network config file on each node. The install + # script copies the files into place and then sleeps so + # that Kubernetes doesn't keep trying to restart it. + - name: install-cni + #image: test.l5d.io/linkerd/cni-plugin:test + image: cr.l5d.io/linkerd/cni-plugin:edge-22.12.1 + env: + - name: DEST_CNI_NET_DIR + valueFrom: + configMapKeyRef: + name: linkerd-cni-config + key: dest_cni_net_dir + - name: DEST_CNI_BIN_DIR + valueFrom: + configMapKeyRef: + name: linkerd-cni-config + key: dest_cni_bin_dir + - name: CNI_NETWORK_CONFIG + valueFrom: + configMapKeyRef: + name: linkerd-cni-config + key: cni_network_config + - name: SLEEP + value: "true" + lifecycle: + # In some edge-cases this helps ensure that cleanup() is called in the container's script + # https://github.com/linkerd/linkerd2/issues/2355 + preStop: + exec: + command: + - /bin/sh + - -c + - kill -15 1; sleep 15s + volumeMounts: + - mountPath: /host/bin + name: cni-bin-dir + - mountPath: /host/var/lib/rancher/k3s/agent/etc/cni/net.d + name: cni-net-dir + - mountPath: /tmp + name: linkerd-tmp-dir + securityContext: + readOnlyRootFilesystem: true + privileged: + volumes: + - name: cni-bin-dir + hostPath: + path: /bin + - name: cni-net-dir + hostPath: + path: /var/lib/rancher/k3s/agent/etc/cni/net.d + - name: linkerd-tmp-dir + emptyDir: {} diff --git a/cni-plugin/integration/run.sh b/cni-plugin/integration/run.sh new file mode 100755 index 00000000..5e69c98a --- /dev/null +++ b/cni-plugin/integration/run.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash + +set -euxo pipefail + +cd "${BASH_SOURCE[0]%/*}" + +# Run kubectl with the correct context. +function k() { + if [ -n "${TEST_CTX:-}" ]; then + kubectl --context="$TEST_CTX" "$@" + else + kubectl "$@" + fi +} + +function create_test_lab() { + echo '# Creating the test lab...' + k create ns cni-plugin-test + k create serviceaccount linkerd-cni + # TODO(stevej): how can we parameterize this manifest with `version` so we + # can enable a testing matrix? + k create -f manifests/linkerd-cni.yaml +} + +function cleanup() { + echo '# Cleaning up...' + k delete -f manifests/linkerd-cni.yaml || echo "could not delete -f manifests/linkerd-cni.yaml" + k delete serviceaccount linkerd-cni || echo "could not delete serviceaccount linkerd-cni" + k delete ns cni-plugin-test || echo "could not delete namespace cni-plugin-test" +} + +trap cleanup EXIT + +if k get ns/cni-plugin-test >/dev/null 2>&1 ; then + echo 'ns/cni-plugin-test already exists' >&2 + exit 1 +fi + +create_test_lab + +# Wait for linkerd-cni daemonset to complete +if ! k rollout status --timeout=30s daemonset/linkerd-cni -n linkerd-cni; then + echo "!! linkerd-cni didn't rollout properly, printing logs"; + k describe ds linkerd-cni || echo "daemonset linkerd-cni not found" + k logs linkerd-cni -n linkerd-cni || echo "logs not found for linkerd-cni" + exit $? +fi + +# TODO(stevej): we don't want to rely on a linkerd build in this repo, we +# can package network-validator separately. +echo '# Run the network validator...' +k run linkerd-proxy \ + --attach \ + -i \ + --command \ + --image="cr.l5d.io/linkerd/proxy:edge-22.12.1" \ + --image-pull-policy=IfNotPresent \ + --namespace=cni-plugin-test \ + --restart=Never \ + --rm \ + -- \ + /usr/lib/linkerd/linkerd2-network-validator --log-format plain \ + --log-level debug --connect-addr 1.1.1.1:20001 \ + --listen-addr 0.0.0.0:4140 --timeout 10s + +echo 'PASS: Network Validator' + +# the integration tests to run. pass in as an environment variable. +# defaults to the tests in the flannel subdirectory +SCENARIO=${SCENARIO-flannel} + +# This needs to use the name linkerd-proxy so that linkerd-cni will run. +echo '# Running tester...' +k run linkerd-proxy \ + --attach \ + --image="test.l5d.io/linkerd/cni-plugin-tester:test" \ + --image-pull-policy=Never \ + --namespace=cni-plugin-test \ + --restart=Never \ + --overrides="{ + \"apiVersion\": \"v1\", + \"spec\": { + \"containers\": [ + { + \"name\": \"linkerd-proxy\", + \"image\": \"test.l5d.io/linkerd/cni-plugin-tester:test\", + \"command\": [\"go\", \"test\", \"-v\", \"./cni-plugin/integration/${SCENARIO}...\", \"-integration-tests\"], + \"volumeMounts\": [ + { + \"mountPath\": \"/var/lib/rancher/k3s/agent/etc/cni/net.d\", + \"name\": \"cni-net-dir\" + } + ] + } + ], + \"volumes\": [ + { + \"name\": \"cni-net-dir\", + \"hostPath\": { + \"path\": \"/var/lib/rancher/k3s/agent/etc/cni/net.d\" + } + } + ] + }, + \"status\": {} + }" \ + --rm diff --git a/cni-plugin/main.go b/cni-plugin/main.go index 0910f93f..9df36d71 100644 --- a/cni-plugin/main.go +++ b/cni-plugin/main.go @@ -203,7 +203,7 @@ func cmdAdd(args *skel.CmdArgs) error { } if containsLinkerdProxy && !containsInitContainer { - logEntry.Debug("linkerd-cni: setting up iptables firewall") + logEntry.Debugf("linkerd-cni: setting up iptables firewall for %s/%s", namespace, pod) options := cmd.RootOptions{ IncomingProxyPort: conf.ProxyInit.IncomingProxyPort, OutgoingProxyPort: conf.ProxyInit.OutgoingProxyPort, @@ -309,13 +309,13 @@ func cmdAdd(args *skel.CmdArgs) error { } func cmdCheck(args *skel.CmdArgs) error { - logrus.Debug("linkerd-cni: cmdCheck not implemented") + logrus.Info("linkerd-cni: check called but not implemented") return nil } // cmdDel is called for DELETE requests func cmdDel(args *skel.CmdArgs) error { - logrus.Debug("linkerd-cni: cmdDel not implemented") + logrus.Info("linkerd-cni: delete called but not implemented") return nil } diff --git a/justfile b/justfile index fc44852c..5b3d0d98 100644 --- a/justfile +++ b/justfile @@ -5,6 +5,7 @@ proxy-init-image := "test.l5d.io/linkerd/proxy-init:test" _test-image := "test.l5d.io/linkerd/iptables-tester:test" cni-plugin-image := "test.l5d.io/linkerd/cni-plugin:test" +_cni-plugin-test-image := "test.l5d.io/linkerd/cni-plugin-tester:test" ## ## Recipes @@ -117,9 +118,15 @@ build-proxy-init-test-image *args='--load': {{ args }} ## -## CNI +## cni-plugin ## +cni-plugin-build: + go build -o target/linkerd2-cni-plugin ./cni-plugin + +cni-plugin-test-unit: + go test -v ./cni-plugin/... + # TODO(stevej): this does not run within the devcontainer cni-plugin-installer-integration-run: build-cni-plugin-image HUB=test.l5d.io/linkerd TAG=test go test -cover -v -mod=readonly ./cni-plugin/test/... -integration-tests @@ -130,12 +137,31 @@ build-cni-plugin-image *args='--load': --file=Dockerfile-cni-plugin \ --tag={{ cni-plugin-image }} \ {{ args }} -## -## Test cluster -## +# Build docker image for cni-plugin-tester (Development) +build-cni-plugin-test-image *args='--load': + docker buildx build . \ + --file=cni-plugin/integration/Dockerfile-tester \ + --tag={{ _cni-plugin-test-image }} \ + {{ args }} + +# Build and load images for cni-plugin +cni-plugin-test-integration-deps: build-cni-plugin-image build-cni-plugin-test-image _k3d-ready + @just-k3d import {{ _cni-plugin-test-image }} {{ cni-plugin-image }} + +# Run cni-plugin integration tests after preparing dependencies +# For new scenarios, add them after cni-plugin-test-integration-deps +cni-plugin-test-integration: cni-plugin-test-integration-deps cni-plugin-test-integration-flannel + +# Run flannel integration tests without preparing dependencies +cni-plugin-test-integration-flannel: + SCENARIO=flannel TEST_CTX="k3d-$(just-k3d --evaluate K3D_CLUSTER_NAME)" ./cni-plugin/integration/run.sh + + + +# TODO(stevej): add a k3d-create-debug export K3S_DISABLE := "local-storage,traefik,servicelb,metrics-server@server:*" -export K3D_CREATE_FLAGS := '--no-lb' +export K3D_CREATE_FLAGS := '--no-lb --k3s-arg "--debug@server:*"' # Creates a k3d cluster that can be used for testing. k3d-create: