diff --git a/contrib/rootless-cni-infra/Containerfile b/contrib/rootless-cni-infra/Containerfile new file mode 100644 index 00000000000..4324f39d2bc --- /dev/null +++ b/contrib/rootless-cni-infra/Containerfile @@ -0,0 +1,36 @@ +ARG GOLANG_VERSION=1.15 +ARG ALPINE_VERSION=3.12 +ARG CNI_VERSION=v0.8.0 +ARG CNI_PLUGINS_VERSION=v0.8.7 +ARG DNSNAME_VERSION=v1.1.1 + +FROM golang:${GOLANG_VERSION}-alpine${ALPINE_VERSION} AS golang-base +RUN apk add --no-cache git + +FROM golang-base AS cnitool +RUN git clone https://github.com/containernetworking/cni /go/src/github.com/containernetworking/cni +WORKDIR /go/src/github.com/containernetworking/cni +ARG CNI_VERSION +RUN git checkout ${CNI_VERSION} +RUN go build -o /cnitool ./cnitool + +FROM golang-base AS dnsname +RUN git clone https://github.com/containers/dnsname /go/src/github.com/containers/dnsname +WORKDIR /go/src/github.com/containers/dnsname +ARG DNSNAME_VERSION +RUN git checkout ${DNSNAME_VERSION} +RUN go build -o /dnsname ./plugins/meta/dnsname + +FROM alpine:${ALPINE_VERSION} +RUN apk add --no-cache curl dnsmasq iptables ip6tables iproute2 +ARG TARGETARCH +ARG CNI_PLUGINS_VERSION +RUN mkdir -p /opt/cni/bin && \ + curl -fsSL https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/cni-plugins-linux-${TARGETARCH}-${CNI_PLUGINS_VERSION}.tgz | tar xz -C /opt/cni/bin +COPY --from=cnitool /cnitool /usr/local/bin +COPY --from=dnsname /dnsname /opt/cni/bin +COPY rootless-cni-infra /usr/local/bin +ENV CNI_PATH=/opt/cni/bin +CMD ["sleep", "infinity"] + +ENV ROOTLESS_CNI_INFRA_VERSION=5 diff --git a/contrib/rootless-cni-infra/README.md b/contrib/rootless-cni-infra/README.md new file mode 100644 index 00000000000..f30d146c3ad --- /dev/null +++ b/contrib/rootless-cni-infra/README.md @@ -0,0 +1,27 @@ +# rootless-cni-infra + +Infra container for CNI-in-slirp4netns. + +## *NOTE*: The following is only required for Podman version 3.0 and earlier. + +## How it works + +When a CNI network is specified for `podman run` in rootless mode, Podman launches the `rootless-cni-infra` container to execute CNI plugins inside slirp4netns. + +The infra container is created per user, by executing an equivalent of: +`podman run -d --name rootless-cni-infra --pid=host --privileged -v $HOME/.config/cni/net.d:/etc/cni/net.d rootless-cni-infra`. +The infra container is automatically deleted when no CNI network is in use. + +Podman then allocates a CNI netns in the infra container, by executing an equivalent of: +`podman exec rootless-cni-infra rootless-cni-infra alloc $CONTAINER_ID $NETWORK_NAME $POD_NAME`. + +The allocated netns is deallocated when the container is being removed, by executing an equivalent of: +`podman exec rootless-cni-infra rootless-cni-infra dealloc $CONTAINER_ID $NETWORK_NAME`. + +The container images live on `quay.io/libpod/rootless-cni-infra`. The tags have the format `$version-$architecture`. Please make sure to increase the version number in the Containerfile (i.e., `ROOTLESS_CNI_INFRA_VERSION`) when applying changes to this directory. After committing the changes, upload the image(s) with the corresponding tag. + +## Directory layout + +* `/run/rootless-cni-infra/${CONTAINER_ID}/pid`: PID of the `sleep infinity` process that corresponds to the allocated netns +* `/run/rootless-cni-infra/${CONTAINER_ID}/attached/${NETWORK_NAME}`: CNI result +* `/run/rootless-cni-infra/${CONTAINER_ID}/attached-args/${NETWORK_NAME}`: CNI args diff --git a/contrib/rootless-cni-infra/rootless-cni-infra b/contrib/rootless-cni-infra/rootless-cni-infra new file mode 100644 index 00000000000..cceb8d817a6 --- /dev/null +++ b/contrib/rootless-cni-infra/rootless-cni-infra @@ -0,0 +1,181 @@ +#!/bin/sh +set -eu + +ARG0="$0" +BASE="/run/rootless-cni-infra" + +wait_unshare_net() { + pid="$1" + # NOTE: busybox shell doesn't support the `for ((i=0; i < $MAX; i++)); do foo; done` statement + i=0 + while :; do + if [ "$(readlink /proc/self/ns/net)" != "$(readlink /proc/${pid}/ns/net)" ]; then + break + fi + sleep 0.1 + if [ $i -ge 10 ]; then + echo >&2 "/proc/${pid}/ns/net cannot be unshared" + exit 1 + fi + i=$((i + 1)) + done +} + +# CLI subcommand: "alloc $CONTAINER_ID $NETWORK_NAME $POD_NAME $IP $MAC $CAP_ARGS" +cmd_entrypoint_alloc() { + if [ "$#" -ne 6 ]; then + echo >&2 "Usage: $ARG0 alloc CONTAINER_ID NETWORK_NAME POD_NAME IP MAC CAP_ARGS" + exit 1 + fi + + ID="$1" + NET="$2" + K8S_POD_NAME="$3" + IP="$4" + MAC="$5" + CAP_ARGS="$6" + + dir="${BASE}/${ID}" + mkdir -p "${dir}/attached" "${dir}/attached-args" + + pid="" + if [ -f "${dir}/pid" ]; then + pid=$(cat "${dir}/pid") + else + unshare -n sleep infinity & + pid="$!" + wait_unshare_net "${pid}" + echo "${pid}" >"${dir}/pid" + nsenter -t "${pid}" -n ip link set lo up + fi + CNI_ARGS="IgnoreUnknown=1;K8S_POD_NAME=${K8S_POD_NAME}" + if [ "$IP" ]; then + CNI_ARGS="$CNI_ARGS;IP=${IP}" + fi + if [ "$MAC" ]; then + CNI_ARGS="$CNI_ARGS;MAC=${MAC}" + fi + if [ "$CAP_ARGS" ]; then + CAP_ARGS="$CAP_ARGS" + fi + nwcount=$(find "${dir}/attached" -type f | wc -l) + CNI_IFNAME="eth${nwcount}" + export CNI_ARGS CNI_IFNAME CAP_ARGS + cnitool add "${NET}" "/proc/${pid}/ns/net" >"${dir}/attached/${NET}" + echo "${CNI_ARGS}" >"${dir}/attached-args/${NET}" + + # return the result + ns="/proc/${pid}/ns/net" + echo "{\"ns\":\"${ns}\"}" +} + +# CLI subcommand: "dealloc $CONTAINER_ID $NETWORK_NAME" +cmd_entrypoint_dealloc() { + if [ "$#" -ne 2 ]; then + echo >&2 "Usage: $ARG0 dealloc CONTAINER_ID NETWORK_NAME" + exit 1 + fi + + ID=$1 + NET=$2 + + dir="${BASE}/${ID}" + if [ ! -f "${dir}/pid" ]; then + exit 0 + fi + pid=$(cat "${dir}/pid") + if [ -f "${dir}/attached-args/${NET}" ]; then + CNI_ARGS=$(cat "${dir}/attached-args/${NET}") + export CNI_ARGS + fi + cnitool del "${NET}" "/proc/${pid}/ns/net" + rm -f "${dir}/attached/${NET}" "${dir}/attached-args/${NET}" + + nwcount=$(find "${dir}/attached" -type f | wc -l) + if [ "${nwcount}" = 0 ]; then + kill -9 "${pid}" + rm -rf "${dir}" + fi + + # return empty json + echo "{}" +} + +# CLI subcommand: "is-idle" +cmd_entrypoint_is_idle() { + if [ ! -d ${BASE} ]; then + echo '{"idle": true}' + elif [ -z "$(ls -1 ${BASE})" ]; then + echo '{"idle": true}' + else + echo '{"idle": false}' + fi +} + +# CLI subcommand: "print-cni-result $CONTAINER_ID $NETWORK_NAME" +cmd_entrypoint_print_cni_result() { + if [ "$#" -ne 2 ]; then + echo >&2 "Usage: $ARG0 print-cni-result CONTAINER_ID NETWORK_NAME" + exit 1 + fi + + ID=$1 + NET=$2 + + # the result shall be CNI JSON + cat "${BASE}/${ID}/attached/${NET}" +} + +# CLI subcommand: "print-netns-path $CONTAINER_ID" +cmd_entrypoint_print_netns_path() { + if [ "$#" -ne 1 ]; then + echo >&2 "Usage: $ARG0 print-netns-path CONTAINER_ID" + exit 1 + fi + + ID=$1 + + pid=$(cat "${BASE}/${ID}/pid") + path="/proc/${pid}/ns/net" + + # return the result + echo "{\"path\":\"${path}\"}" +} + +# CLI subcommand: "help" +cmd_entrypoint_help() { + echo "Usage: ${ARG0} COMMAND" + echo + echo "Rootless CNI Infra container" + echo + echo "Commands:" + echo " alloc Allocate a netns" + echo " dealloc Deallocate a netns" + echo " is-idle Print whether the infra container is idle" + echo " print-cni-result Print CNI result" + echo " print-netns-path Print netns path" + echo " help Print help" + echo " version Print version" +} + +# CLI subcommand: "version" +cmd_entrypoint_version() { + echo "{\"version\": \"${ROOTLESS_CNI_INFRA_VERSION}\"}" +} + +# parse args +command="${1:-}" +if [ -z "$command" ]; then + echo >&2 "No command was specified. Run \`${ARG0} help\` to see the usage." + exit 1 +fi + +command_func=$(echo "cmd_entrypoint_${command}" | sed -e "s/-/_/g") +if ! command -v "${command_func}" >/dev/null 2>&1; then + echo >&2 "Unknown command: ${command}. Run \`${ARG0} help\` to see the usage." + exit 1 +fi + +# start the command func +shift +"${command_func}" "$@"