Skip to content

Commit

Permalink
[Backport] Install fib rules and routes with proto kernel to avoid sy…
Browse files Browse the repository at this point in the history
…stemd messing with them.

Can be dropped on v1.14+

Upstream-PR: cilium#25350

Bug: b/339297320

Change-Id: I340da52623eb5cbc6e571c2df7a48ced80eb0a42
Reviewed-on: https://gke-internal-review.googlesource.com/c/third_party/cilium/+/1016152
Unit-Verified: Prow_Bot_V2 <[email protected]>
Reviewed-by: Sugang Li <[email protected]>
Lint: Lint 🤖 <[email protected]>
Reviewed-by: Prow_Bot_V2 <[email protected]>
Tested-by: Prow_Bot_V2 <[email protected]>
Reviewed-by: Mark St. John <[email protected]>
  • Loading branch information
yasz24 committed May 8, 2024
1 parent c04b4c2 commit e40a942
Show file tree
Hide file tree
Showing 42 changed files with 855 additions and 172 deletions.
22 changes: 12 additions & 10 deletions bpf/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ NR_CPUS=${21}
ENDPOINT_ROUTES=${22}
PROXY_RULE=${23}
FILTER_PRIO=${24}
DEFAULT_RTPROTO=${25}
LOCAL_RULE_PRIO=${26}

ID_HOST=1
ID_WORLD=2
Expand Down Expand Up @@ -74,21 +76,21 @@ function move_local_rules_af()
return
fi

# move the local table lookup rule from pref 0 to pref 100 so we can
# insert the cilium ip rules before the local table. It is strictly
# move the local table lookup rule from pref 0 to pref LOCAL_RULE_PRIO so we
# can insert the cilium ip rules before the local table. It is strictly
# required to add the new local rule before deleting the old one as
# otherwise local addresses will not be reachable for a short period of
# time.
$IP rule list | grep 100 | grep "lookup local" || {
$IP rule add from all lookup local pref 100
$IP rule list | grep "${LOCAL_RULE_PRIO}" | grep "lookup local" || {
$IP rule add from all lookup local pref ${LOCAL_RULE_PRIO} proto $DEFAULT_RTPROTO
}
$IP rule del from all lookup local pref 0 2> /dev/null || true

# check if the move of the local table move was successful and restore
# it otherwise
if [ "$($IP rule list | grep "lookup local" | wc -l)" -eq "0" ]; then
$IP rule add from all lookup local pref 0
$IP rule del from all lookup local pref 100
$IP rule add from all lookup local pref 0 proto $DEFAULT_RTPROTO
$IP rule del from all lookup local pref ${LOCAL_RULE_PRIO}
echo "Error: The kernel does not support moving the local table routing rule"
echo "Local routing rules:"
$IP rule list lookup local
Expand All @@ -110,13 +112,13 @@ function move_local_rules()
function setup_proxy_rules()
{
# TODO(brb): remove $PROXY_RT_TABLE -related code in v1.15
from_ingress_rulespec="fwmark 0xA00/0xF00 pref 10 lookup $PROXY_RT_TABLE"
from_ingress_rulespec="fwmark 0xA00/0xF00 pref 10 lookup $PROXY_RT_TABLE proto $DEFAULT_RTPROTO"

# Any packet to an ingress or egress proxy uses a separate routing table
# that routes the packet to the loopback device regardless of the destination
# address in the packet. For this to work the ctx must have a socket set
# (e.g., via TPROXY).
to_proxy_rulespec="fwmark 0x200/0xF00 pref 9 lookup $TO_PROXY_RT_TABLE"
to_proxy_rulespec="fwmark 0x200/0xF00 pref 9 lookup $TO_PROXY_RT_TABLE proto $DEFAULT_RTPROTO"

if [ "$IP4_HOST" != "<nil>" ]; then
if [ -n "$(ip -4 rule list)" ]; then
Expand All @@ -128,7 +130,7 @@ function setup_proxy_rules()
fi

# Traffic to the host proxy is local
ip route replace table $TO_PROXY_RT_TABLE local 0.0.0.0/0 dev lo
ip route replace table $TO_PROXY_RT_TABLE local 0.0.0.0/0 dev lo proto $DEFAULT_RTPROTO

# The $PROXY_RT_TABLE is no longer in use, so delete it
ip route delete table $PROXY_RT_TABLE $IP4_HOST/32 dev $HOST_DEV1 2>/dev/null || true
Expand Down Expand Up @@ -157,7 +159,7 @@ function setup_proxy_rules()
IP6_LLADDR=$(ip -6 addr show dev $HOST_DEV2 | grep inet6 | head -1 | awk '{print $2}' | awk -F'/' '{print $1}')
if [ -n "$IP6_LLADDR" ]; then
# Traffic to the host proxy is local
ip -6 route replace table $TO_PROXY_RT_TABLE local ::/0 dev lo
ip -6 route replace table $TO_PROXY_RT_TABLE local ::/0 dev lo proto $DEFAULT_RTPROTO
# Traffic from ingress proxy goes to Cilium address space via the cilium host device
if [ "$ENDPOINT_ROUTES" = "true" ]; then
ip -6 route delete table $PROXY_RT_TABLE ${IP6_LLADDR}/128 dev $HOST_DEV1 2>/dev/null || true
Expand Down
2 changes: 1 addition & 1 deletion bugtool/cmd/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func defaultCommands(confDir string, cmdDir string, k8sPods []string) []string {
"ip6tables-nft-save -c",
"iptables-legacy-save -c",
"ip6tables-legacy-save -c",
"ip rule",
"ip -d rule",
"ipset list",
// xfrm
"ip -s xfrm policy",
Expand Down
7 changes: 7 additions & 0 deletions daemon/cmd/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
"golang.org/x/sync/semaphore"
"golang.org/x/sys/unix"

"github.com/cilium/cilium/api/v1/models"
health "github.com/cilium/cilium/cilium-health/launch"
Expand Down Expand Up @@ -311,6 +312,12 @@ func (d *Daemon) init() error {
return fmt.Errorf("failed while reinitializing datapath: %w", err)
}

if err := linuxdatapath.NodeEnsureLocalIPRule(); errors.Is(err, unix.EEXIST) {
log.WithError(err).Warn("Failed to ensure local IP rules")
} else if err != nil {
return fmt.Errorf("failed to ensure local IP rules: %w", err)
}

if option.Config.SockopsEnable {
eppolicymap.CreateEPPolicyMap()
if err := sockops.SockmapEnable(); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion daemon/cmd/datapath.go
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ func setupRouteToVtepCidr() error {
To: prefix.IPNet,
Table: linux_defaults.RouteTableVtep,
}
if err := route.DeleteRule(rule); err != nil {
if err := route.DeleteRule(netlink.FAMILY_V4, rule); err != nil {
return fmt.Errorf("Delete VTEP CIDR rule error: %w", err)
}
}
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ require (
github.com/stretchr/testify v1.8.4
github.com/tidwall/gjson v1.14.4
github.com/tidwall/sjson v1.2.5
github.com/vishvananda/netlink v1.2.1-beta.2.0.20220608195807-1a118fe229fc
github.com/vishvananda/netlink v1.2.1-beta.2.0.20230420174744-55c8b9515a01
github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74
go.etcd.io/etcd/api/v3 v3.5.6
go.etcd.io/etcd/client/pkg/v3 v3.5.6
Expand Down
6 changes: 3 additions & 3 deletions go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 7 additions & 5 deletions pkg/datapath/linux/ipsec.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,17 +238,18 @@ func (n *linuxNodeHandler) removeEncryptRules() error {
Priority: 1,
Mask: linux_defaults.RouteMarkMask,
Table: linux_defaults.RouteTableIPSec,
Protocol: linux_defaults.RTProto,
}

rule.Mark = linux_defaults.RouteMarkDecrypt
if err := route.DeleteRule(rule); err != nil {
if err := route.DeleteRule(netlink.FAMILY_V4, rule); err != nil {
if !os.IsNotExist(err) {
return fmt.Errorf("Delete previous IPv4 decrypt rule failed: %s", err)
}
}

rule.Mark = linux_defaults.RouteMarkEncrypt
if err := route.DeleteRule(rule); err != nil {
if err := route.DeleteRule(netlink.FAMILY_V4, rule); err != nil {
if !os.IsNotExist(err) {
return fmt.Errorf("Delete previousa IPv4 encrypt rule failed: %s", err)
}
Expand All @@ -259,14 +260,14 @@ func (n *linuxNodeHandler) removeEncryptRules() error {
}

rule.Mark = linux_defaults.RouteMarkDecrypt
if err := route.DeleteRuleIPv6(rule); err != nil {
if err := route.DeleteRule(netlink.FAMILY_V6, rule); err != nil {
if !os.IsNotExist(err) && !errors.Is(err, unix.EAFNOSUPPORT) {
return fmt.Errorf("Delete previous IPv6 decrypt rule failed: %s", err)
}
}

rule.Mark = linux_defaults.RouteMarkEncrypt
if err := route.DeleteRuleIPv6(rule); err != nil {
if err := route.DeleteRule(netlink.FAMILY_V6, rule); err != nil {
if !os.IsNotExist(err) && !errors.Is(err, unix.EAFNOSUPPORT) {
return fmt.Errorf("Delete previous IPv6 encrypt rule failed: %s", err)
}
Expand All @@ -287,7 +288,7 @@ func (n *linuxNodeHandler) createNodeIPSecInRoute(ip *net.IPNet) route.Route {
Device: device,
Prefix: *ip,
Table: linux_defaults.RouteTableIPSec,
Proto: linux_defaults.RouteProtocolIPSec,
Proto: linux_defaults.RTProto,
Type: route.RTN_LOCAL,
}
}
Expand All @@ -299,6 +300,7 @@ func (n *linuxNodeHandler) createNodeIPSecOutRoute(ip *net.IPNet) route.Route {
Prefix: *ip,
Table: linux_defaults.RouteTableIPSec,
MTU: n.nodeConfig.MtuConfig.GetRoutePostEncryptMTU(),
Proto: linux_defaults.RTProto,
}
}

Expand Down
10 changes: 8 additions & 2 deletions pkg/datapath/linux/linux_defaults/linux_defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ package linux_defaults

import (
"time"

"golang.org/x/sys/unix"
)

// Linux specific constants used in Linux datapath
Expand Down Expand Up @@ -65,8 +67,8 @@ const (
// RouterMarkNodePort
MaskMultinodeNodeport = 0x80

// IPSecProtocolID IP protocol ID for IPSec defined in RFC4303
RouteProtocolIPSec = 50
// RTProto is the default protocol we install our fib rules and routes with
RTProto = unix.RTPROT_KERNEL

// RulePriorityWireguard is the priority of the rule used for routing packets to Wireguard device for encryption
RulePriorityWireguard = 1
Expand All @@ -82,6 +84,10 @@ const (
// before the local table priority.
RulePriorityIngress = 20

// RulePriorityLocalLookup is the priority for the local lookup rule which is
// moved on init from 0
RulePriorityLocalLookup = 100

// RulePriorityEgress is the priority of the rule used for egress routing
// of endpoints. This priority is after the local table priority.
RulePriorityEgress = 110
Expand Down
100 changes: 96 additions & 4 deletions pkg/datapath/linux/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"

"github.com/cilium/cilium/pkg/cidr"
"github.com/cilium/cilium/pkg/counter"
Expand Down Expand Up @@ -192,8 +193,9 @@ func createDirectRouteSpec(CIDR *cidr.CIDR, nodeIP net.IP) (routeSpec *netlink.R
var routes []netlink.Route

routeSpec = &netlink.Route{
Dst: CIDR.IPNet,
Gw: nodeIP,
Dst: CIDR.IPNet,
Gw: nodeIP,
Protocol: linux_defaults.RTProto,
}

routes, err = netlink.RouteGet(nodeIP)
Expand Down Expand Up @@ -317,8 +319,9 @@ func (n *linuxNodeHandler) deleteDirectRoute(CIDR *cidr.CIDR, nodeIP net.IP) {
}

filter := &netlink.Route{
Dst: CIDR.IPNet,
Gw: nodeIP,
Dst: CIDR.IPNet,
Gw: nodeIP,
Protocol: linux_defaults.RTProto,
}

routes, err := netlink.RouteListFiltered(family, filter, netlink.RT_FILTER_DST|netlink.RT_FILTER_GW)
Expand Down Expand Up @@ -386,6 +389,7 @@ func (n *linuxNodeHandler) createNodeRouteSpec(prefix *cidr.CIDR, isLocalNode bo
Prefix: *prefix.IPNet,
MTU: mtu,
Priority: option.Config.RouteMetric,
Proto: linux_defaults.RTProto,
}, nil
}

Expand Down Expand Up @@ -1042,6 +1046,7 @@ func (n *linuxNodeHandler) replaceHostRules() error {
Priority: 1,
Mask: linux_defaults.RouteMarkMask,
Table: linux_defaults.RouteTableIPSec,
Protocol: linux_defaults.RTProto,
}

if n.nodeConfig.EnableIPv4 {
Expand Down Expand Up @@ -1476,3 +1481,90 @@ func NodeDeviceNameWithDefaultRoute() (string, error) {
}
return link.Attrs().Name, nil
}

func deleteOldLocalRule(family int, rule route.Rule) error {
var familyStr string

// sanity check, nothing to do if the rule is the same
if linux_defaults.RTProto == unix.RTPROT_UNSPEC {
return nil
}

if family == netlink.FAMILY_V4 {
familyStr = "IPv4"
} else {
familyStr = "IPv6"
}

localRules, err := route.ListRules(family, &rule)
if err != nil {
return fmt.Errorf("could not list local %s rules: %w", familyStr, err)
}

// we need to check for the old rule and make sure it's before the new one
oldPos := -1
found := false
for pos, rule := range localRules {
// mark the first unspec rule that matches
if oldPos == -1 && rule.Protocol == unix.RTPROT_UNSPEC {
oldPos = pos
}

if rule.Protocol == linux_defaults.RTProto {
// mark it as found only if it's before the new one
if oldPos != -1 {
found = true
}
break
}
}

if found == true {
err := route.DeleteRule(family, rule)
if err != nil {
return fmt.Errorf("could not delete old %s local rule: %w", familyStr, err)
}
log.WithFields(logrus.Fields{"family": familyStr}).Info("Deleting old local lookup rule")
}

return nil
}

// NodeEnsureLocalIPRule checks if Cilium local lookup rule (usually 100)
// was installed and has proper protocol
func NodeEnsureLocalIPRule() error {
// we have the Cilium local lookup rule only if the proxy rule is present
if !option.Config.InstallIptRules || !option.Config.EnableL7Proxy {
return nil
}

localRule := route.Rule{Priority: linux_defaults.RulePriorityLocalLookup, Table: unix.RT_TABLE_LOCAL, Mark: -1, Mask: -1, Protocol: linux_defaults.RTProto}
oldRule := localRule
oldRule.Protocol = unix.RTPROT_UNSPEC

if option.Config.EnableIPv4 {
err := route.ReplaceRule(localRule)
if err != nil {
return fmt.Errorf("could not replace IPv4 local rule: %w", err)
}

err = deleteOldLocalRule(netlink.FAMILY_V4, oldRule)
if err != nil {
return err
}
}

if option.Config.EnableIPv6 {
err := route.ReplaceRuleIPv6(localRule)
if err != nil {
return fmt.Errorf("could not replace IPv6 local rule: %w", err)
}

err = deleteOldLocalRule(netlink.FAMILY_V6, oldRule)
if err != nil {
return err
}
}

return nil
}
Loading

0 comments on commit e40a942

Please sign in to comment.