diff --git a/Makefile b/Makefile index 90a723d753..e18e9a07d0 100644 --- a/Makefile +++ b/Makefile @@ -35,6 +35,7 @@ endif REPO_ROOT = $(shell git rev-parse --show-toplevel) REVISION ?= $(shell git rev-parse --short HEAD) ACN_VERSION ?= $(shell git describe --exclude "azure-ipam*" --exclude "dropgz*" --exclude "zapai*" --tags --always) +IPV6_HP_BPF_VERSION ?= $(notdir $(shell git describe --match "ipv6-hp-bpf*" --tags --always)) AZURE_IPAM_VERSION ?= $(notdir $(shell git describe --match "azure-ipam*" --tags --always)) CNI_VERSION ?= $(ACN_VERSION) CNI_DROPGZ_VERSION ?= $(notdir $(shell git describe --match "dropgz*" --tags --always)) @@ -44,6 +45,7 @@ ZAPAI_VERSION ?= $(notdir $(shell git describe --match "zapai*" --tags --alway # Build directories. AZURE_IPAM_DIR = $(REPO_ROOT)/azure-ipam +IPV6_HP_BPF_DIR = $(REPO_ROOT)/bpf-prog/ipv6-hp-bpf CNM_DIR = $(REPO_ROOT)/cnm/plugin CNI_NET_DIR = $(REPO_ROOT)/cni/network/plugin CNI_IPAM_DIR = $(REPO_ROOT)/cni/ipam/plugin @@ -56,6 +58,7 @@ NPM_DIR = $(REPO_ROOT)/npm/cmd OUTPUT_DIR = $(REPO_ROOT)/output BUILD_DIR = $(OUTPUT_DIR)/$(GOOS)_$(GOARCH) AZURE_IPAM_BUILD_DIR = $(BUILD_DIR)/azure-ipam +IPV6_HP_BPF_BUILD_DIR = $(BUILD_DIR)/bpf-prog/ipv6-hp-bpf IMAGE_DIR = $(OUTPUT_DIR)/images CNM_BUILD_DIR = $(BUILD_DIR)/cnm CNI_BUILD_DIR = $(BUILD_DIR)/cni @@ -139,6 +142,7 @@ azure-cns: azure-cns-binary cns-archive acncli: acncli-binary acncli-archive azure-npm: azure-npm-binary npm-archive azure-ipam: azure-ipam-binary azure-ipam-archive +ipv6-hp-bpf: ipv6-hp-bpf-binary ipv6-hp-bpf-archive ##@ Versioning @@ -154,6 +158,9 @@ acncli-version: version azure-ipam-version: ## prints the azure-ipam version @echo $(AZURE_IPAM_VERSION) +ipv6-hp-bpf-version: ## prints the ipv6-hp-bpf version + @echo $(IPV6_HP_BPF_VERSION) + cni-version: ## prints the cni version @echo $(CNI_VERSION) @@ -175,6 +182,11 @@ zapai-version: ## prints the zapai version azure-ipam-binary: cd $(AZURE_IPAM_DIR) && CGO_ENABLED=0 go build -v -o $(AZURE_IPAM_BUILD_DIR)/azure-ipam$(EXE_EXT) -ldflags "-X github.com/Azure/azure-container-networking/azure-ipam/internal/buildinfo.Version=$(AZURE_IPAM_VERSION)" -gcflags="-dwarflocationlists=true" +# Build the ipv6-hp-bpf binary. +ipv6-hp-bpf-binary: + cd $(IPV6_HP_BPF_DIR) && CGO_ENABLED=0 go generate ./... + cd $(IPV6_HP_BPF_DIR)/cmd/ipv6-hp-bpf && CGO_ENABLED=0 go build -v -o $(IPV6_HP_BPF_BUILD_DIR)$(EXE_EXT) -ldflags "-X main.version=$(IPV6_HP_BPF_VERSION)" -gcflags="-dwarflocationlists=true" + # Build the Azure CNM binary. cnm-binary: cd $(CNM_DIR) && CGO_ENABLED=0 go build -v -o $(CNM_BUILD_DIR)/azure-vnet-plugin$(EXE_EXT) -ldflags "-X main.version=$(ACN_VERSION)" -gcflags="-dwarflocationlists=true" @@ -252,6 +264,7 @@ endif ## Image name definitions. ACNCLI_IMAGE = acncli AZURE_IPAM_IMAGE = azure-ipam +IPV6_HP_BPF_IMAGE = ipv6-hp-bpf CNI_IMAGE = azure-cni CNI_DROPGZ_IMAGE = cni-dropgz CNS_IMAGE = azure-cns @@ -261,6 +274,7 @@ NPM_IMAGE = azure-npm ACNCLI_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))$(if $(OS_VERSION),-$(OS_VERSION),)-$(ACN_VERSION) AZURE_IPAM_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))$(if $(OS_VERSION),-$(OS_VERSION),)-$(AZURE_IPAM_VERSION) AZURE_IPAM_WINDOWS_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))$(if $(OS_VERSION),-$(OS_VERSION),)-$(AZURE_IPAM_VERSION)-$(OS_SKU_WIN) +IPV6_HP_BPF_IMAGE_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))$(if $(OS_VERSION),-$(OS_VERSION),)-$(IPV6_HP_BPF_VERSION) CNI_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))$(if $(OS_VERSION),-$(OS_VERSION),)-$(CNI_VERSION) CNI_WINDOWS_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))$(if $(OS_VERSION),-$(OS_VERSION),)-$(CNI_VERSION)-$(OS_SKU_WIN) CNI_DROPGZ_PLATFORM_TAG ?= $(subst /,-,$(PLATFORM))$(if $(OS_VERSION),-$(OS_VERSION),)-$(CNI_DROPGZ_VERSION) @@ -368,6 +382,34 @@ azure-ipam-image-pull: ## pull azure-ipam container image. IMAGE=$(AZURE_IPAM_IMAGE) \ TAG=$(AZURE_IPAM_PLATFORM_TAG) +# ipv6-hp-bpf + +ipv6-hp-bpf-image-name: # util target to print the ipv6-hp-bpf image name. + @echo $(IPV6_HP_BPF_IMAGE) + +ipv6-hp-bpf-image-name-and-tag: # util target to print the ipv6-hp-bpf image name and tag. + @echo $(IMAGE_REGISTRY)/$(IPV6_HP_BPF_IMAGE):$(IPV6_HP_BPF_IMAGE_PLATFORM_TAG) + +ipv6-hp-bpf-image: ## build ipv6-hp-bpf container image. + $(MAKE) container \ + DOCKERFILE=bpf-prog/ipv6-hp-bpf/$(OS).Dockerfile \ + IMAGE=$(IPV6_HP_BPF_IMAGE) \ + EXTRA_BUILD_ARGS='--build-arg OS=$(OS) --build-arg ARCH=$(ARCH) --build-arg OS_VERSION=$(OS_VERSION) --build-arg DEBUG=$(DEBUG)'\ + PLATFORM=$(PLATFORM) \ + TAG=$(IPV6_HP_BPF_IMAGE_PLATFORM_TAG) \ + OS=$(OS) \ + ARCH=$(ARCH) \ + OS_VERSION=$(OS_VERSION) + +ipv6-hp-bpf-image-push: ## push ipv6-hp-bpf container image. + $(MAKE) container-push \ + IMAGE=$(IPV6_HP_BPF_IMAGE) \ + TAG=$(IPV6_HP_BPF_IMAGE_PLATFORM_TAG) + +ipv6-hp-bpf-image-pull: ## pull ipv6-hp-bpf container image. + $(MAKE) container-pull \ + IMAGE=$(IPV6_HP_BPF_IMAGE) \ + TAG=$(IPV6_HP_BPF_IMAGE_PLATFORM_TAG) # cni @@ -742,6 +784,13 @@ ifeq ($(GOOS),linux) cd $(AZURE_IPAM_BUILD_DIR) && $(ARCHIVE_CMD) $(AZURE_IPAM_ARCHIVE_NAME) azure-ipam$(EXE_EXT) endif +# Create a ipv6-hp-bpf archive for the target platform. +.PHONY: ipv6-hp-bpf-archive +ipv6-hp-bpf-archive: ipv6-hp-bpf-binary +ifeq ($(GOOS),linux) + $(MKDIR) $(IPV6_HP_BPF_BUILD_DIR) + cd $(IPV6_HP_BPF_BUILD_DIR) && $(ARCHIVE_CMD) $(IPV6_HP_BPF_ARCHIVE_NAME) ipv6-hp-bpf$(EXE_EXT) +endif ##@ Utils diff --git a/bpf-prog/ipv6-hp-bpf/README b/bpf-prog/ipv6-hp-bpf/README new file mode 100644 index 0000000000..808a568413 --- /dev/null +++ b/bpf-prog/ipv6-hp-bpf/README @@ -0,0 +1,59 @@ +# ipv6-hp-bpf + +`ipv6-hp-bpf` is a project that leverages eBPF (Extended Berkeley Packet Filter) technology for traffic control in Linux kernel. This is a POC to fix external load balancer services in cilium dualstack clusters. + +## Description + +The goal of this bpf program is to fix the issue described [here](https://github.com/cilium/cilium/issues/31326). It includes both egress and ingress TC programs. These programs are meant to replace the nftable rules since they don't work on cilium clusters. +The egress bpf code converts the destination IPv6 of the packet from global unicast to link local, and ingress converts the source IPv6 from link local to global unicast. + +## Usage + +Follow the steps below to compile the program and install it onto your node: + +1. Use the make command to build the binary or follow the steps below. + ```bash + make ipv6-hp-bpf-binary + ``` + +2. Copy the new binary to your node(s). + +3. Remove the nftable rules for ipv6 with the following commands: + ```bash + nft delete chain ip6 azureSLBProbe postrouting + nft delete chain ip6 azureSLBProbe prerouting + nft -n list table ip6 azureSLBProbe + ``` + +4. Start the program with: + ```bash + ./ipv6-hp-bpf + ``` +5. Debugging logs can be seen in the node under `/sys/kernel/debug/traceing/trace_pipe` + +## Manual Compilation +For testing purposes you can compile the bpf program without go, and attach it to the interface yourself. This is how you would do it for egress: +```bash +clang -O2 -g -target bpf -c egress.c -o egress.o +``` + +This will generate the egress.o file, which you can copy over to your cluster's node. +To copy to the node you need to create a node-shell instance +```bash +kubectl cp egress.o nsenter-xxxxx: +``` + +Since this is for cilium clusters, cilium already creates a qdisc on eth0 of type clsact (which allows both ingress and egress filters to be attached). If cilium is not installed, you would have to create the qdisc on your own by doing the following: +```bash +tc qdisc add dev eth0 clsact +``` + +## Attach the filter +```bash +tc filter add dev eth0 egress prio 1 bpf da obj egress.o sec classifier +``` + +## Verify the filter is attached +```bash +tc filter show dev eth0 egress +``` \ No newline at end of file diff --git a/bpf-prog/ipv6-hp-bpf/cmd/ipv6-hp-bpf/main.go b/bpf-prog/ipv6-hp-bpf/cmd/ipv6-hp-bpf/main.go new file mode 100644 index 0000000000..d68b7d131e --- /dev/null +++ b/bpf-prog/ipv6-hp-bpf/cmd/ipv6-hp-bpf/main.go @@ -0,0 +1,92 @@ +package main + +import ( + "bytes" + "net" + "os/exec" + + "github.com/Azure/azure-container-networking/bpf-prog/ipv6-hp-bpf/pkg/egress" + "github.com/Azure/azure-container-networking/bpf-prog/ipv6-hp-bpf/pkg/ingress" + "github.com/vishvananda/netlink" + + "github.com/cilium/ebpf/rlimit" + "go.uber.org/zap" +) + +var logger *zap.Logger + +func main() { + // Set up logger + config := zap.NewProductionConfig() + config.OutputPaths = []string{"stdout", "/var/log/azure-ipv6-hp-bpf.log"} + logger, _ = config.Build() + + // Remove resource limits for kernels <5.11. + if err := rlimit.RemoveMemlock(); err != nil { + logger.Error("Removing memlock", zap.Error(err)) + return + } + + // Check 'nft -n list tables ip6' to see if table exists + cmd := exec.Command("nft", "-n", "list", "tables", "ip6") + output, err := cmd.CombinedOutput() + if err != nil { + logger.Error("error running 'nft -n list tables ip6'", zap.Error(err), zap.String("output", string(output))) + return + } + + // if azureSLBProbe table exists, delete it + if bytes.Contains(output, []byte("azureSLBProbe")) { + cmd := exec.Command("nft", "delete", "table", "ip6", "azureSLBProbe") + err = cmd.Run() + if err != nil { + logger.Error("failed to run 'nft delete table ip6 azureSLBProbe'", zap.Error(err)) + return + } + } + + ifname := "eth0" + iface, err := net.InterfaceByName(ifname) + if err != nil { + logger.Error("Getting interface", zap.String("interface", ifname), zap.Error(err)) + } + logger.Info("Interface has index", zap.String("interface", ifname), zap.Int("index", iface.Index)) + + // Create a qdisc filter for traffic on the interface. + fq := &netlink.GenericQdisc{ + QdiscAttrs: netlink.QdiscAttrs{ + LinkIndex: iface.Index, + Handle: netlink.MakeHandle(0xffff, 0), + Parent: netlink.HANDLE_CLSACT, + }, + QdiscType: "clsact", + } + if err := netlink.QdiscReplace(fq); err != nil { + logger.Error("failed setting egress qdisc", zap.Error(err)) + return + } + + // Load the compiled eBPF ELF and load it into the kernel. + // Set up ingress and egress filters to attach to eth0 clsact qdisc + var objsEgress egress.EgressObjects + defer objsEgress.Close() + if err := egress.LoadEgressObjects(&objsEgress, nil); err != nil { + logger.Error("Failed to load eBPF egress objects", zap.Error(err)) + } + if err := egress.SetupEgressFilter(iface.Index, &objsEgress, logger); err != nil { + logger.Error("Setting up egress filter", zap.Error(err)) + } else { + logger.Info("Successfully set egress filter on", zap.String("interface", ifname)) + } + + var objsIngress ingress.IngressObjects + if err := ingress.LoadIngressObjects(&objsIngress, nil); err != nil { + logger.Error("Loading eBPF ingress objects", zap.Error(err)) + } + defer objsIngress.Close() + if err := ingress.SetupIngressFilter(iface.Index, &objsIngress, logger); err != nil { + logger.Error("Setting up ingress filter", zap.Error(err)) + } else { + logger.Info("Successfully set ingress filter on", zap.String("interface", ifname)) + } +} diff --git a/bpf-prog/ipv6-hp-bpf/go.mod b/bpf-prog/ipv6-hp-bpf/go.mod new file mode 100644 index 0000000000..e6dd75a0d6 --- /dev/null +++ b/bpf-prog/ipv6-hp-bpf/go.mod @@ -0,0 +1,16 @@ +module github.com/Azure/azure-container-networking/bpf-prog/ipv6-hp-bpf + +go 1.21.6 + +require ( + github.com/cilium/ebpf v0.15.0 + github.com/vishvananda/netlink v1.1.0 + go.uber.org/zap v1.27.0 +) + +require ( + github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df // indirect + go.uber.org/multierr v1.10.0 // indirect + golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 // indirect + golang.org/x/sys v0.15.0 // indirect +) diff --git a/bpf-prog/ipv6-hp-bpf/go.sum b/bpf-prog/ipv6-hp-bpf/go.sum new file mode 100644 index 0000000000..00bc04b807 --- /dev/null +++ b/bpf-prog/ipv6-hp-bpf/go.sum @@ -0,0 +1,35 @@ +github.com/cilium/ebpf v0.15.0 h1:7NxJhNiBT3NG8pZJ3c+yfrVdHY8ScgKD27sScgjLMMk= +github.com/cilium/ebpf v0.15.0/go.mod h1:DHp1WyrLeiBh19Cf/tfiSMhqheEiK8fXFZ4No0P1Hso= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-quicktest/qt v1.101.0 h1:O1K29Txy5P2OK0dGo59b7b0LR6wKfIhttaAhHUyn7eI= +github.com/go-quicktest/qt v1.101.0/go.mod h1:14Bz/f7NwaXPtdYEgzsx46kqSxVwTbzVZsDC26tQJow= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= +github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/vishvananda/netlink v1.1.0 h1:1iyaYNBLmP6L0220aDnYQpo1QEV4t4hJ+xEEhhJH8j0= +github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= +github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df h1:OviZH7qLw/7ZovXvuNyL3XQl8UFofeikI1NW1Gypu7k= +github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ= +go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 h1:Jvc7gsqn21cJHCmAWx0LiimpP18LZmUxkT5Mp7EZ1mI= +golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= +golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= +golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/bpf-prog/ipv6-hp-bpf/include/helper.h b/bpf-prog/ipv6-hp-bpf/include/helper.h new file mode 100644 index 0000000000..885e19d397 --- /dev/null +++ b/bpf-prog/ipv6-hp-bpf/include/helper.h @@ -0,0 +1,18 @@ +#include +#include + +#define L4_HDR_OFF (ETH_HLEN + sizeof(struct ipv6hdr)) +#define BPF_F_PSEUDO_HDR (1ULL << 4) + +static __always_inline bool compare_ipv6_addr(const struct in6_addr *addr1, const struct in6_addr *addr2) +{ +#pragma unroll + for (int i = 0; i < sizeof(struct in6_addr); i++) + { + if (addr1->s6_addr[i] != addr2->s6_addr[i]) + { + return false; + } + } + return true; +} \ No newline at end of file diff --git a/bpf-prog/ipv6-hp-bpf/linux.Dockerfile b/bpf-prog/ipv6-hp-bpf/linux.Dockerfile new file mode 100644 index 0000000000..3a21f8d05f --- /dev/null +++ b/bpf-prog/ipv6-hp-bpf/linux.Dockerfile @@ -0,0 +1,32 @@ +FROM mcr.microsoft.com/oss/go/microsoft/golang:1.21 AS builder +ARG VERSION +ARG DEBUG +ARG OS +WORKDIR /bpf-prog/ipv6-hp-bpf +COPY ./bpf-prog/ipv6-hp-bpf . +COPY ./bpf-prog/ipv6-hp-bpf/cmd/ipv6-hp-bpf/*.go /bpf-prog/ipv6-hp-bpf/ +COPY ./bpf-prog/ipv6-hp-bpf/include/helper.h /bpf-prog/ipv6-hp-bpf/include/helper.h +RUN apt-get update && apt-get install -y llvm clang linux-libc-dev linux-headers-generic libbpf-dev libc6-dev gcc-multilib nftables iproute2 +RUN for dir in /usr/include/x86_64-linux-gnu/*; do ln -s "$dir" /usr/include/$(basename "$dir"); done +ENV C_INCLUDE_PATH=/usr/include/bpf +RUN if [ "$DEBUG" = "true" ]; then echo "\n#define DEBUG" >> /bpf-prog/ipv6-hp-bpf/include/helper.h; fi +RUN GOOS=$OS CGO_ENABLED=0 go generate ./... +RUN GOOS=$OS CGO_ENABLED=0 go build -a -o /go/bin/ipv6-hp-bpf -trimpath -ldflags "-X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" . + +FROM mcr.microsoft.com/cbl-mariner/distroless/minimal:2.0 +COPY --from=builder /go/bin/ipv6-hp-bpf /ipv6-hp-bpf +COPY --from=builder /usr/sbin/nft /usr/sbin/nft +COPY --from=builder /sbin/ip /sbin/ip +COPY --from=builder /lib/x86_64-linux-gnu/libnftables.so.1 /lib/x86_64-linux-gnu/ +COPY --from=builder /lib/x86_64-linux-gnu/libedit.so.2 /lib/x86_64-linux-gnu/ +COPY --from=builder /lib/x86_64-linux-gnu/libc.so.6 /lib/x86_64-linux-gnu/ +COPY --from=builder /lib/x86_64-linux-gnu/libmnl.so.0 /lib/x86_64-linux-gnu/ +COPY --from=builder /lib/x86_64-linux-gnu/libnftnl.so.11 /lib/x86_64-linux-gnu/ +COPY --from=builder /lib/x86_64-linux-gnu/libxtables.so.12 /lib/x86_64-linux-gnu/ +COPY --from=builder /lib/x86_64-linux-gnu/libjansson.so.4 /lib/x86_64-linux-gnu/ +COPY --from=builder /lib/x86_64-linux-gnu/libgmp.so.10 /lib/x86_64-linux-gnu/ +COPY --from=builder /lib/x86_64-linux-gnu/libtinfo.so.6 /lib/x86_64-linux-gnu/ +COPY --from=builder /lib/x86_64-linux-gnu/libbsd.so.0 /lib/x86_64-linux-gnu/ +COPY --from=builder /lib64/ld-linux-x86-64.so.2 /lib64/ +COPY --from=builder /lib/x86_64-linux-gnu/libmd.so.0 /lib/x86_64-linux-gnu/ +CMD ["/ipv6-hp-bpf"] \ No newline at end of file diff --git a/bpf-prog/ipv6-hp-bpf/pkg/egress/bpf/egress.c b/bpf-prog/ipv6-hp-bpf/pkg/egress/bpf/egress.c new file mode 100644 index 0000000000..eafd40d59e --- /dev/null +++ b/bpf-prog/ipv6-hp-bpf/pkg/egress/bpf/egress.c @@ -0,0 +1,82 @@ +// go:build ignore +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../include/helper.h" + +SEC("classifier") +int gua_to_linklocal(struct __sk_buff *skb) +{ + // Define the link-local address fe80::1234:5678:9abc + const struct in6_addr LINKLOCAL_ADDR = {{{0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc}}}; + + // Define the global unicast address 2603:1062:0000:0001:fe80:1234:5678:9abc + const struct in6_addr GLOBAL_UNICAST_ADDR = {{{0x26, 0x03, 0x10, 0x62, 0x00, 0x00, 0x00, 0x01, 0xfe, 0x80, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc}}}; + + struct in6_addr dst_addr; + struct ipv6hdr ipv6_hdr; + + // Load the IPv6 header from the packet + int ret_hdr = bpf_skb_load_bytes(skb, ETH_HLEN, &ipv6_hdr, sizeof(ipv6_hdr)); + if (ret_hdr != 0) + { + bpf_printk("bpf_skb_load_bytes failed to load IPv6 header with error code %d.\n", ret_hdr); + return TC_ACT_UNSPEC; + } + + // Check if the packet is TCP + if (ipv6_hdr.nexthdr != IPPROTO_TCP) + return TC_ACT_UNSPEC; + + // Load the destination address from the packet + int ret = bpf_skb_load_bytes(skb, ETH_HLEN + offsetof(struct ipv6hdr, daddr), &dst_addr, sizeof(dst_addr)); + if (ret != 0) + { + bpf_printk("bpf_skb_load_bytes failed to load destination address with error code %d.\n", ret); + return TC_ACT_UNSPEC; + } + + // Check the destination address to determine if it is a global unicast address + if (compare_ipv6_addr(&dst_addr, &GLOBAL_UNICAST_ADDR)) + { + +#ifdef DEBUG + bpf_printk("Destination address is a global unicast address. Setting new addr to link local.\n"); + bpf_printk("Destination address is %pI6.\n", &dst_addr); +#endif + + // Store the new destination address in the packet + int ret = bpf_skb_store_bytes(skb, ETH_HLEN + offsetof(struct ipv6hdr, daddr), + &LINKLOCAL_ADDR, sizeof(LINKLOCAL_ADDR), 0); + if (ret != 0) + { + bpf_printk("bpf_skb_store_bytes failed to store new destination address with error code %d.\n", ret); + return TC_ACT_SHOT; + } + + // Update the checksum + __be32 sum = bpf_csum_diff((__be32 *)GLOBAL_UNICAST_ADDR.s6_addr32, sizeof(GLOBAL_UNICAST_ADDR), + (__be32 *)LINKLOCAL_ADDR.s6_addr32, sizeof(LINKLOCAL_ADDR), 0); + + int offset = offsetof(struct tcphdr, check); + + ret = bpf_l4_csum_replace(skb, L4_HDR_OFF + offset, 0, sum, BPF_F_PSEUDO_HDR); + if (ret < 0) + { + bpf_printk("csum_l4_replace failed to update checksum: %d", ret); + return TC_ACT_SHOT; + } + } + + return TC_ACT_UNSPEC; +} + +char __license[] SEC("license") = "Dual MIT/GPL"; diff --git a/bpf-prog/ipv6-hp-bpf/pkg/egress/egress.go b/bpf-prog/ipv6-hp-bpf/pkg/egress/egress.go new file mode 100644 index 0000000000..b8de9fcb32 --- /dev/null +++ b/bpf-prog/ipv6-hp-bpf/pkg/egress/egress.go @@ -0,0 +1,57 @@ +package egress + +import ( + "syscall" + + "github.com/vishvananda/netlink" + "go.uber.org/zap" +) + +// SetupEgressFilter sets up the egress filter +func SetupEgressFilter(ifaceIndex int, objs *EgressObjects, logger *zap.Logger) error { + link, err := netlink.LinkByIndex(ifaceIndex) + if err != nil { + logger.Error("Failed to get link", zap.Error(err)) + return err + } + + // Get the list of filters on the link + filters, err := netlink.FilterList(link, netlink.HANDLE_MIN_EGRESS) + if err != nil { + logger.Error("Failed to get filter list", zap.Error(err)) + return err + } + + // Check if egress filter exists and delete it. Filter is identified by its name. + // this is to avoid duplicate filters after restarting the daemonset + for _, filter := range filters { + if filter, ok := filter.(*netlink.BpfFilter); ok && filter.Name == "ipv6_hp_egress" { + if err := netlink.FilterDel(filter); err != nil { + logger.Error("Failed to delete filter", zap.Error(err)) + return err + } + break + } + } + + egressFilter := &netlink.BpfFilter{ + FilterAttrs: netlink.FilterAttrs{ + LinkIndex: ifaceIndex, + Parent: netlink.HANDLE_MIN_EGRESS, + Protocol: syscall.ETH_P_ALL, + Priority: 1, + }, + Fd: objs.GuaToLinklocal.FD(), + Name: "ipv6_hp_egress", + DirectAction: true, + } + + if err := netlink.FilterReplace(egressFilter); err != nil { + logger.Error("failed setting egress filter", zap.Error(err)) + return err + } else { + logger.Info("Successfully set egress filter on", zap.Int("ifaceIndex", ifaceIndex)) + } + + return nil +} diff --git a/bpf-prog/ipv6-hp-bpf/pkg/egress/gen.go b/bpf-prog/ipv6-hp-bpf/pkg/egress/gen.go new file mode 100644 index 0000000000..485fa769f9 --- /dev/null +++ b/bpf-prog/ipv6-hp-bpf/pkg/egress/gen.go @@ -0,0 +1,3 @@ +package egress + +//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -target bpfel,bpfeb -go-package egress Egress ./bpf/egress.c -- -I./bpf/include diff --git a/bpf-prog/ipv6-hp-bpf/pkg/ingress/bpf/ingress.c b/bpf-prog/ipv6-hp-bpf/pkg/ingress/bpf/ingress.c new file mode 100644 index 0000000000..84b0cce63a --- /dev/null +++ b/bpf-prog/ipv6-hp-bpf/pkg/ingress/bpf/ingress.c @@ -0,0 +1,80 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../include/helper.h" + +SEC("classifier") +int linklocal_to_gua(struct __sk_buff *skb) +{ + // Define the global unicast address 2603:1062:0000:0001:fe80:1234:5678:9abc + const struct in6_addr GLOBAL_UNICAST_ADDR = {{{0x26, 0x03, 0x10, 0x62, 0x00, 0x00, 0x00, 0x01, 0xfe, 0x80, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc}}}; + // Define the link-local address fe80::1234:5678:9abc + const struct in6_addr LINKLOCAL_ADDR = {{{0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc}}}; + + struct in6_addr src_addr; + struct ipv6hdr ipv6_hdr; + + // Load the IPv6 header from the packet + int ret_hdr = bpf_skb_load_bytes(skb, ETH_HLEN, &ipv6_hdr, sizeof(ipv6_hdr)); + if (ret_hdr != 0) + { + bpf_printk("bpf_skb_load_bytes failed to load IPv6 header with error code %d.\n", ret_hdr); + return TC_ACT_UNSPEC; + } + + // Check if the packet is TCP + if (ipv6_hdr.nexthdr != IPPROTO_TCP) + return TC_ACT_UNSPEC; + + // Load the source address from the packet + int ret = bpf_skb_load_bytes(skb, ETH_HLEN + offsetof(struct ipv6hdr, saddr), &src_addr, sizeof(src_addr)); + if (ret != 0) + { + bpf_printk("bpf_skb_load_bytes failed to load source address with error code %d.\n", ret); + return TC_ACT_UNSPEC; + } + + // Check the source address to determine if it is Link Local + if (compare_ipv6_addr(&src_addr, &LINKLOCAL_ADDR)) + { + +#ifdef DEBUG + bpf_printk("Source address is a link local address. Setting new addr to global unicast.\n"); + bpf_printk("Source address is %pI6.\n", &src_addr); +#endif + + // Store the new source address in the packet + int ret = bpf_skb_store_bytes(skb, ETH_HLEN + offsetof(struct ipv6hdr, saddr), + &GLOBAL_UNICAST_ADDR, sizeof(GLOBAL_UNICAST_ADDR), 0); + if (ret != 0) + { + bpf_printk("bpf_skb_store_bytes failed to store new source address with error code %d.\n", ret); + return TC_ACT_SHOT; + } + + // Update the checksum + __be32 sum = bpf_csum_diff((__be32 *)LINKLOCAL_ADDR.s6_addr32, sizeof(LINKLOCAL_ADDR.s6_addr32), + (__be32 *)GLOBAL_UNICAST_ADDR.s6_addr32, sizeof(GLOBAL_UNICAST_ADDR.s6_addr32), 0); + + int offset = offsetof(struct tcphdr, check); + + ret = bpf_l4_csum_replace(skb, L4_HDR_OFF + offset, 0, sum, BPF_F_PSEUDO_HDR); + if (ret < 0) + { + bpf_printk("csum_l4_replace failed to update checksum: %d", ret); + return TC_ACT_SHOT; + } + } + + return TC_ACT_UNSPEC; +} + +char __license[] SEC("license") = "Dual MIT/GPL"; diff --git a/bpf-prog/ipv6-hp-bpf/pkg/ingress/gen.go b/bpf-prog/ipv6-hp-bpf/pkg/ingress/gen.go new file mode 100644 index 0000000000..428386bc33 --- /dev/null +++ b/bpf-prog/ipv6-hp-bpf/pkg/ingress/gen.go @@ -0,0 +1,3 @@ +package ingress + +//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -target bpfel,bpfeb -go-package ingress Ingress ./bpf/ingress.c -- -I./bpf/include diff --git a/bpf-prog/ipv6-hp-bpf/pkg/ingress/ingress.go b/bpf-prog/ipv6-hp-bpf/pkg/ingress/ingress.go new file mode 100644 index 0000000000..556e5b9bfe --- /dev/null +++ b/bpf-prog/ipv6-hp-bpf/pkg/ingress/ingress.go @@ -0,0 +1,57 @@ +package ingress + +import ( + "syscall" + + "github.com/vishvananda/netlink" + "go.uber.org/zap" +) + +// SetupIngressFilter sets up the ingress filter +func SetupIngressFilter(ifaceIndex int, objs *IngressObjects, logger *zap.Logger) error { + link, err := netlink.LinkByIndex(ifaceIndex) + if err != nil { + logger.Error("Failed to get link", zap.Error(err)) + return err + } + + // Get the list of filters on the link + filters, err := netlink.FilterList(link, netlink.HANDLE_MIN_INGRESS) + if err != nil { + logger.Error("Failed to get filter list", zap.Error(err)) + return err + } + + // Check if egress filter exists and delete it. Filter is identified by its name. + // this is to avoid duplicate filters after restarting the daemonsetS + for _, filter := range filters { + if filter, ok := filter.(*netlink.BpfFilter); ok && filter.Name == "ipv6_hp_ingress" { + if err := netlink.FilterDel(filter); err != nil { + logger.Error("Failed to delete filter", zap.Error(err)) + return err + } + break + } + } + + ingressFilter := &netlink.BpfFilter{ + FilterAttrs: netlink.FilterAttrs{ + LinkIndex: ifaceIndex, + Parent: netlink.HANDLE_MIN_INGRESS, + Protocol: syscall.ETH_P_ALL, + Priority: 1, + }, + Fd: objs.LinklocalToGua.FD(), + Name: "ipv6_hp_ingress", + DirectAction: true, + } + + if err := netlink.FilterReplace(ingressFilter); err != nil { + logger.Error("failed setting ingress filter", zap.Error(err)) + return err + } else { + logger.Info("Successfully set ingress filter on", zap.Int("ifaceIndex", ifaceIndex)) + } + + return nil +}