Skip to content

Commit

Permalink
Merge branch 'devlink-deadlock'
Browse files Browse the repository at this point in the history
Jiri Pirko says:

====================
devlink: fix a deadlock when taking devlink instance lock while holding RTNL lock

devlink_port_fill() may be called sometimes with RTNL lock held.
When putting the nested port function devlink instance attrs,
current code takes nested devlink instance lock. In that case lock
ordering is wrong.

Patch #1 is a dependency of patch #2.
Patch #2 converts the peernet2id_alloc() call to rely in RCU so it could
         called without devlink instance lock.
Patch #3 takes device reference for devlink instance making sure that
         device does not disappear before devlink_release() is called.
Patch #4 benefits from the preparations done in patches #2 and #3 and
         removes the problematic nested devlink lock aquisition.
Patched #5-torvalds#7 improve documentation to reflect this issue so it is
              avoided in the future.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Oct 18, 2023
2 parents ee2a35f + 5d77371 commit a0a8602
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 21 deletions.
28 changes: 28 additions & 0 deletions Documentation/networking/devlink/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,34 @@ netlink commands.

Drivers are encouraged to use the devlink instance lock for their own needs.

Drivers need to be cautious when taking devlink instance lock and
taking RTNL lock at the same time. Devlink instance lock needs to be taken
first, only after that RTNL lock could be taken.

Nested instances
----------------

Some objects, like linecards or port functions, could have another
devlink instances created underneath. In that case, drivers should make
sure to respect following rules:

- Lock ordering should be maintained. If driver needs to take instance
lock of both nested and parent instances at the same time, devlink
instance lock of the parent instance should be taken first, only then
instance lock of the nested instance could be taken.
- Driver should use object-specific helpers to setup the
nested relationship:

- ``devl_nested_devlink_set()`` - called to setup devlink -> nested
devlink relationship (could be user for multiple nested instances.
- ``devl_port_fn_devlink_set()`` - called to setup port function ->
nested devlink relationship.
- ``devlink_linecard_nested_dl_set()`` - called to setup linecard ->
nested devlink relationship.

The nested devlink info is exposed to the userspace over object-specific
attributes of devlink netlink.

Interface documentation
-----------------------

Expand Down
15 changes: 12 additions & 3 deletions include/net/net_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -368,21 +368,30 @@ static inline void put_net_track(struct net *net, netns_tracker *tracker)

typedef struct {
#ifdef CONFIG_NET_NS
struct net *net;
struct net __rcu *net;
#endif
} possible_net_t;

static inline void write_pnet(possible_net_t *pnet, struct net *net)
{
#ifdef CONFIG_NET_NS
pnet->net = net;
rcu_assign_pointer(pnet->net, net);
#endif
}

static inline struct net *read_pnet(const possible_net_t *pnet)
{
#ifdef CONFIG_NET_NS
return pnet->net;
return rcu_dereference_protected(pnet->net, true);
#else
return &init_net;
#endif
}

static inline struct net *read_pnet_rcu(possible_net_t *pnet)
{
#ifdef CONFIG_NET_NS
return rcu_dereference(pnet->net);
#else
return &init_net;
#endif
Expand Down
34 changes: 19 additions & 15 deletions net/devlink/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,20 @@ int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index,
return 0;
}

/**
* devlink_rel_nested_in_notify - Notify the object this devlink
* instance is nested in.
* @devlink: devlink
*
* This is called upon network namespace change of devlink instance.
* In case this devlink instance is nested in another devlink object,
* a notification of a change of this object should be sent
* over netlink. The parent devlink instance lock needs to be
* taken during the notification preparation.
* However, since the devlink lock of nested instance is held here,
* we would end with wrong devlink instance lock ordering and
* deadlock. Therefore the work is utilized to avoid that.
*/
void devlink_rel_nested_in_notify(struct devlink *devlink)
{
struct devlink_rel *rel = devlink->rel;
Expand All @@ -183,9 +197,8 @@ static struct devlink_rel *devlink_rel_find(unsigned long rel_index)
DEVLINK_REL_IN_USE);
}

static struct devlink *devlink_rel_devlink_get_lock(u32 rel_index)
static struct devlink *devlink_rel_devlink_get(u32 rel_index)
{
struct devlink *devlink;
struct devlink_rel *rel;
u32 devlink_index;

Expand All @@ -198,16 +211,7 @@ static struct devlink *devlink_rel_devlink_get_lock(u32 rel_index)
xa_unlock(&devlink_rels);
if (!rel)
return NULL;
devlink = devlinks_xa_get(devlink_index);
if (!devlink)
return NULL;
devl_lock(devlink);
if (!devl_is_registered(devlink)) {
devl_unlock(devlink);
devlink_put(devlink);
return NULL;
}
return devlink;
return devlinks_xa_get(devlink_index);
}

int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
Expand All @@ -218,11 +222,10 @@ int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
struct devlink *rel_devlink;
int err;

rel_devlink = devlink_rel_devlink_get_lock(rel_index);
rel_devlink = devlink_rel_devlink_get(rel_index);
if (!rel_devlink)
return 0;
err = devlink_nl_put_nested_handle(msg, net, rel_devlink, attrtype);
devl_unlock(rel_devlink);
devlink_put(rel_devlink);
if (!err && msg_updated)
*msg_updated = true;
Expand Down Expand Up @@ -310,6 +313,7 @@ static void devlink_release(struct work_struct *work)

mutex_destroy(&devlink->lock);
lockdep_unregister_key(&devlink->lock_key);
put_device(devlink->dev);
kfree(devlink);
}

Expand Down Expand Up @@ -425,7 +429,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
if (ret < 0)
goto err_xa_alloc;

devlink->dev = dev;
devlink->dev = get_device(dev);
devlink->ops = ops;
xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC);
xa_init_flags(&devlink->params, XA_FLAGS_ALLOC);
Expand Down
12 changes: 9 additions & 3 deletions net/devlink/netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,24 @@ int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net,
struct devlink *devlink, int attrtype)
{
struct nlattr *nested_attr;
struct net *devl_net;

nested_attr = nla_nest_start(msg, attrtype);
if (!nested_attr)
return -EMSGSIZE;
if (devlink_nl_put_handle(msg, devlink))
goto nla_put_failure;
if (!net_eq(net, devlink_net(devlink))) {
int id = peernet2id_alloc(net, devlink_net(devlink),
GFP_KERNEL);

rcu_read_lock();
devl_net = read_pnet_rcu(&devlink->_net);
if (!net_eq(net, devl_net)) {
int id = peernet2id_alloc(net, devl_net, GFP_ATOMIC);

rcu_read_unlock();
if (nla_put_s32(msg, DEVLINK_ATTR_NETNS_ID, id))
return -EMSGSIZE;
} else {
rcu_read_unlock();
}

nla_nest_end(msg, nested_attr);
Expand Down

0 comments on commit a0a8602

Please sign in to comment.