Skip to content

Commit

Permalink
ipv6: Plumb support for nexthop object in a fib6_info
Browse files Browse the repository at this point in the history
Add struct nexthop and nh_list list_head to fib6_info. nh_list is the
fib6_info side of the nexthop <-> fib_info relationship. Since a fib6_info
referencing a nexthop object can not have 'sibling' entries (the old way
of doing multipath routes), the nh_list is a union with fib6_siblings.

Add f6i_list list_head to 'struct nexthop' to track fib6_info entries
using a nexthop instance. Update __remove_nexthop_fib to walk f6_list
and delete fib entries using the nexthop.

Add a few nexthop helpers for use when a nexthop is added to fib6_info:
- nexthop_fib6_nh - return first fib6_nh in a nexthop object
- fib6_info_nh_dev moved to nexthop.h and updated to use nexthop_fib6_nh
  if the fib6_info references a nexthop object
- nexthop_path_fib6_result - similar to ipv4, select a path within a
  multipath nexthop object. If the nexthop is a blackhole, set
  fib6_result type to RTN_BLACKHOLE, and set the REJECT flag

Update the fib6_info references to check for nh and take a different path
as needed:
- rt6_qualify_for_ecmp - if a fib entry uses a nexthop object it can NOT
  be coalesced with other fib entries into a multipath route
- rt6_duplicate_nexthop - use nexthop_cmp if either fib6_info references
  a nexthop
- addrconf (host routes), RA's and info entries (anything configured via
  ndisc) does not use nexthop objects
- fib6_info_destroy_rcu - put reference to nexthop object
- fib6_purge_rt - drop fib6_info from f6i_list
- fib6_select_path - update to use the new nexthop_path_fib6_result when
  fib entry uses a nexthop object
- rt6_device_match - update to catch use of nexthop object as a blackhole
  and set fib6_type and flags.
- ip6_route_info_create - don't add space for fib6_nh if fib entry is
  going to reference a nexthop object, take a reference to nexthop object,
  disallow use of source routing
- rt6_nlmsg_size - add space for RTA_NH_ID
- add rt6_fill_node_nexthop to add nexthop data on a dump

As with ipv4, most of the changes push existing code into the else branch
of whether the fib entry uses a nexthop object.

Update the nexthop code to walk f6i_list on a nexthop deleted to remove
fib entries referencing it.

Signed-off-by: David Ahern <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
dsahern authored and davem330 committed Jun 5, 2019
1 parent 4c7e808 commit f88d8ea
Show file tree
Hide file tree
Showing 8 changed files with 260 additions and 36 deletions.
11 changes: 5 additions & 6 deletions include/net/ip6_fib.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,10 @@ struct fib6_info {
* destination, but not the same gateway. nsiblings is just a cache
* to speed up lookup.
*/
struct list_head fib6_siblings;
union {
struct list_head fib6_siblings;
struct list_head nh_list;
};
unsigned int fib6_nsiblings;

refcount_t fib6_ref;
Expand All @@ -170,6 +173,7 @@ struct fib6_info {
unused:3;

struct rcu_head rcu;
struct nexthop *nh;
struct fib6_nh fib6_nh[0];
};

Expand Down Expand Up @@ -441,11 +445,6 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr)
rcu_read_unlock();
}

static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
{
return f6i->fib6_nh->fib_nh_dev;
}

int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack);
Expand Down
13 changes: 11 additions & 2 deletions include/net/ip6_route.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ struct route_info {
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/route.h>
#include <net/nexthop.h>

#define RT6_LOOKUP_F_IFACE 0x00000001
#define RT6_LOOKUP_F_REACHABLE 0x00000002
Expand Down Expand Up @@ -66,10 +67,13 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}

/* fib entries using a nexthop object can not be coalesced into
* a multipath route
*/
static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
{
/* the RTF_ADDRCONF flag filters out RA's */
return !(f6i->fib6_flags & RTF_ADDRCONF) &&
return !(f6i->fib6_flags & RTF_ADDRCONF) && !f6i->nh &&
f6i->fib6_nh->fib_nh_gw_family;
}

Expand Down Expand Up @@ -275,8 +279,13 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,

static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
{
struct fib6_nh *nha = a->fib6_nh, *nhb = b->fib6_nh;
struct fib6_nh *nha, *nhb;

if (a->nh || b->nh)
return nexthop_cmp(a->nh, b->nh);

nha = a->fib6_nh;
nhb = b->fib6_nh;
return nha->fib_nh_dev == nhb->fib_nh_dev &&
ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) &&
!lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
Expand Down
50 changes: 50 additions & 0 deletions include/net/nexthop.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define __LINUX_NEXTHOP_H

#include <linux/netdevice.h>
#include <linux/route.h>
#include <linux/types.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
Expand Down Expand Up @@ -78,6 +79,7 @@ struct nh_group {
struct nexthop {
struct rb_node rb_node; /* entry on netns rbtree */
struct list_head fi_list; /* v4 entries using nh */
struct list_head f6i_list; /* v6 entries using nh */
struct list_head grp_list; /* nh group entries using this nh */
struct net *net;

Expand Down Expand Up @@ -255,4 +257,52 @@ static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)

return &fi->fib_nh[nhsel];
}

/*
* IPv6 variants
*/
int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
struct netlink_ext_ack *extack);

static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
{
struct nh_info *nhi;

if (nexthop_is_multipath(nh)) {
nh = nexthop_mpath_select(nh, 0);
if (!nh)
return NULL;
}

nhi = rcu_dereference_rtnl(nh->nh_info);
if (nhi->family == AF_INET6)
return &nhi->fib6_nh;

return NULL;
}

static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
{
struct fib6_nh *fib6_nh;

fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh;
return fib6_nh->fib_nh_dev;
}

static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
{
struct nexthop *nh = res->f6i->nh;
struct nh_info *nhi;

nh = nexthop_select_path(nh, hash);

nhi = rcu_dereference_rtnl(nh->nh_info);
if (nhi->reject_nh) {
res->fib6_type = RTN_BLACKHOLE;
res->fib6_flags |= RTF_REJECT;
res->nh = nexthop_fib6_nh(nh);
} else {
res->nh = &nhi->fib6_nh;
}
}
#endif
44 changes: 44 additions & 0 deletions net/ipv4/nexthop.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ static struct nexthop *nexthop_alloc(void)
nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
if (nh) {
INIT_LIST_HEAD(&nh->fi_list);
INIT_LIST_HEAD(&nh->f6i_list);
INIT_LIST_HEAD(&nh->grp_list);
}
return nh;
Expand Down Expand Up @@ -516,6 +517,41 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
}
EXPORT_SYMBOL_GPL(nexthop_select_path);

int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
struct nh_info *nhi;

/* fib6_src is unique to a fib6_info and limits the ability to cache
* routes in fib6_nh within a nexthop that is potentially shared
* across multiple fib entries. If the config wants to use source
* routing it can not use nexthop objects. mlxsw also does not allow
* fib6_src on routes.
*/
if (!ipv6_addr_any(&cfg->fc_src)) {
NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
return -EINVAL;
}

if (nh->is_group) {
struct nh_group *nhg;

nhg = rtnl_dereference(nh->nh_grp);
if (nhg->has_v4)
goto no_v4_nh;
} else {
nhi = rtnl_dereference(nh->nh_info);
if (nhi->family == AF_INET)
goto no_v4_nh;
}

return 0;
no_v4_nh:
NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop");
return -EINVAL;
}
EXPORT_SYMBOL_GPL(fib6_check_nexthop);

static int nexthop_check_scope(struct nexthop *nh, u8 scope,
struct netlink_ext_ack *extack)
{
Expand Down Expand Up @@ -658,6 +694,7 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)

static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
{
struct fib6_info *f6i, *tmp;
bool do_flush = false;
struct fib_info *fi;

Expand All @@ -667,6 +704,13 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
}
if (do_flush)
fib_flush(net);

/* ip6_del_rt removes the entry from this list hence the _safe */
list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
/* __ip6_del_rt does a release, so do a hold here */
fib6_info_hold(f6i);
ipv6_stub->ip6_del_rt(net, f6i);
}
}

static void __remove_nexthop(struct net *net, struct nexthop *nh,
Expand Down
5 changes: 5 additions & 0 deletions net/ipv6/addrconf.c
Original file line number Diff line number Diff line change
Expand Up @@ -2421,6 +2421,10 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
goto out;

for_each_fib6_node_rt_rcu(fn) {
/* prefix routes only use builtin fib6_nh */
if (rt->nh)
continue;

if (rt->fib6_nh->fib_nh_dev->ifindex != dev->ifindex)
continue;
if (no_gw && rt->fib6_nh->fib_nh_gw_family)
Expand Down Expand Up @@ -6352,6 +6356,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
list_for_each_entry(ifa, &idev->addr_list, if_list) {
spin_lock(&ifa->lock);
if (ifa->rt) {
/* host routes only use builtin fib6_nh */
struct fib6_nh *nh = ifa->rt->fib6_nh;
int cpu;

Expand Down
22 changes: 18 additions & 4 deletions net/ipv6/ip6_fib.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
if (!f6i)
return NULL;

/* fib6_siblings is a union with nh_list, so this initializes both */
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);

Expand All @@ -171,7 +172,11 @@ void fib6_info_destroy_rcu(struct rcu_head *head)

WARN_ON(f6i->fib6_node);

fib6_nh_release(f6i->fib6_nh);
if (f6i->nh)
nexthop_put(f6i->nh);
else
fib6_nh_release(f6i->fib6_nh);

ip_fib_metrics_put(f6i->fib6_metrics);
kfree(f6i);
}
Expand Down Expand Up @@ -927,6 +932,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,

fib6_drop_pcpu_from(rt, table);

if (rt->nh && !list_empty(&rt->nh_list))
list_del_init(&rt->nh_list);

if (refcount_read(&rt->fib6_ref) != 1) {
/* This route is used as dummy address holder in some split
* nodes. It is not leaked, but it still holds other resources,
Expand Down Expand Up @@ -1334,6 +1342,8 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,

err = fib6_add_rt2node(fn, rt, info, extack);
if (!err) {
if (rt->nh)
list_add(&rt->nh_list, &rt->nh->f6i_list);
__fib6_update_sernum_upto_root(rt, sernum);
fib6_start_gc(info->nl_net, rt);
}
Expand Down Expand Up @@ -2295,24 +2305,28 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
{
struct fib6_info *rt = v;
struct ipv6_route_iter *iter = seq->private;
struct fib6_nh *fib6_nh = rt->fib6_nh;
unsigned int flags = rt->fib6_flags;
const struct net_device *dev;

if (rt->nh)
fib6_nh = nexthop_fib6_nh(rt->nh);

seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);

#ifdef CONFIG_IPV6_SUBTREES
seq_printf(seq, "%pi6 %02x ", &rt->fib6_src.addr, rt->fib6_src.plen);
#else
seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif
if (rt->fib6_nh->fib_nh_gw_family) {
if (fib6_nh->fib_nh_gw_family) {
flags |= RTF_GATEWAY;
seq_printf(seq, "%pi6", &rt->fib6_nh->fib_nh_gw6);
seq_printf(seq, "%pi6", &fib6_nh->fib_nh_gw6);
} else {
seq_puts(seq, "00000000000000000000000000000000");
}

dev = rt->fib6_nh->fib_nh_dev;
dev = fib6_nh->fib_nh_dev;
seq_printf(seq, " %08x %08x %08x %08x %8s\n",
rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
flags, dev ? dev->name : "");
Expand Down
3 changes: 1 addition & 2 deletions net/ipv6/ndisc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1289,9 +1289,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
!in6_dev->cnf.accept_ra_rtr_pref)
pref = ICMPV6_ROUTER_PREF_MEDIUM;
#endif

/* routes added from RAs do not use nexthop objects */
rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);

if (rt) {
neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
rt->fib6_nh->fib_nh_dev, NULL,
Expand Down
Loading

0 comments on commit f88d8ea

Please sign in to comment.