Skip to content

Commit

Permalink
IB/cma: Add configfs for rdma_cm
Browse files Browse the repository at this point in the history
Users would like to control the behaviour of rdma_cm.
For example, old applications which don't set the
required RoCE gid type could be executed on RoCE V2
network types. In order to support this configuration,
we implement a configfs for rdma_cm.

In order to use the configfs, one needs to mount it and
mkdir <IB device name> inside rdma_cm directory.

The patch adds support for a single configuration file,
default_roce_mode. The mode can either be "IB/RoCE v1" or
"RoCE v2".

Signed-off-by: Matan Barak <[email protected]>
Signed-off-by: Doug Ledford <[email protected]>
  • Loading branch information
matanb10 authored and dledford committed Dec 23, 2015
1 parent 218a773 commit 045959d
Show file tree
Hide file tree
Showing 7 changed files with 504 additions and 7 deletions.
22 changes: 22 additions & 0 deletions Documentation/ABI/testing/configfs-rdma_cm
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
What: /config/rdma_cm
Date: November 29, 2015
KernelVersion: 4.4.0
Description: Interface is used to configure RDMA-cable HCAs in respect to
RDMA-CM attributes.

Attributes are visible only when configfs is mounted. To mount
configfs in /config directory use:
# mount -t configfs none /config/

In order to set parameters related to a specific HCA, a directory
for this HCA has to be created:
mkdir -p /config/rdma_cm/<hca>


What: /config/rdma_cm/<hca>/ports/<port-num>/default_roce_mode
Date: November 29, 2015
KernelVersion: 4.4.0
Description: RDMA-CM based connections from HCA <hca> at port <port-num>
will be initiated with this RoCE type as default.
The possible RoCE types are either "IB/RoCE v1" or "RoCE v2".
This parameter has RW access.
9 changes: 9 additions & 0 deletions drivers/infiniband/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,15 @@ config INFINIBAND_ADDR_TRANS
depends on INFINIBAND
default y

config INFINIBAND_ADDR_TRANS_CONFIGFS
bool
depends on INFINIBAND_ADDR_TRANS && CONFIGFS_FS && !(INFINIBAND=y && CONFIGFS_FS=m)
default y
---help---
ConfigFS support for RDMA communication manager (CM).
This allows the user to config the default GID type that the CM
uses for each device, when initiaing new connections.

source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
Expand Down
2 changes: 2 additions & 0 deletions drivers/infiniband/core/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o

rdma_cm-y := cma.o

rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o

rdma_ucm-y := ucma.o

ib_addr-y := addr.o
Expand Down
24 changes: 24 additions & 0 deletions drivers/infiniband/core/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,30 @@ const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
}
EXPORT_SYMBOL(ib_cache_gid_type_str);

int ib_cache_gid_parse_type_str(const char *buf)
{
unsigned int i;
size_t len;
int err = -EINVAL;

len = strlen(buf);
if (len == 0)
return -EINVAL;

if (buf[len - 1] == '\n')
len--;

for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
len == strlen(gid_type_str[i])) {
err = i;
break;
}

return err;
}
EXPORT_SYMBOL(ib_cache_gid_parse_type_str);

/* This function expects that rwlock will be write locked in all
* scenarios and that lock will be locked in sleep-able (RoCE)
* scenarios.
Expand Down
108 changes: 101 additions & 7 deletions drivers/infiniband/core/cma.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ struct cma_device {
struct completion comp;
atomic_t refcount;
struct list_head id_list;
enum ib_gid_type *default_gid_type;
};

struct rdma_bind_list {
Expand Down Expand Up @@ -192,6 +193,62 @@ void cma_ref_dev(struct cma_device *cma_dev)
atomic_inc(&cma_dev->refcount);
}

struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
void *cookie)
{
struct cma_device *cma_dev;
struct cma_device *found_cma_dev = NULL;

mutex_lock(&lock);

list_for_each_entry(cma_dev, &dev_list, list)
if (filter(cma_dev->device, cookie)) {
found_cma_dev = cma_dev;
break;
}

if (found_cma_dev)
cma_ref_dev(found_cma_dev);
mutex_unlock(&lock);
return found_cma_dev;
}

int cma_get_default_gid_type(struct cma_device *cma_dev,
unsigned int port)
{
if (port < rdma_start_port(cma_dev->device) ||
port > rdma_end_port(cma_dev->device))
return -EINVAL;

return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
}

int cma_set_default_gid_type(struct cma_device *cma_dev,
unsigned int port,
enum ib_gid_type default_gid_type)
{
unsigned long supported_gids;

if (port < rdma_start_port(cma_dev->device) ||
port > rdma_end_port(cma_dev->device))
return -EINVAL;

supported_gids = roce_gid_type_mask_support(cma_dev->device, port);

if (!(supported_gids & 1 << default_gid_type))
return -EINVAL;

cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] =
default_gid_type;

return 0;
}

struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
{
return cma_dev->device;
}

/*
* Device removal can occur at anytime, so we need extra handling to
* serialize notifying the user of device removal with other callbacks.
Expand Down Expand Up @@ -343,17 +400,27 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}

static void cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
cma_ref_dev(cma_dev);
id_priv->cma_dev = cma_dev;
id_priv->gid_type = 0;
id_priv->id.device = cma_dev->device;
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
}

static void cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
_cma_attach_to_dev(id_priv, cma_dev);
id_priv->gid_type =
cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(cma_dev->device)];
}

void cma_deref_dev(struct cma_device *cma_dev)
{
if (atomic_dec_and_test(&cma_dev->refcount))
Expand Down Expand Up @@ -449,6 +516,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
}

static inline int cma_validate_port(struct ib_device *device, u8 port,
enum ib_gid_type gid_type,
union ib_gid *gid, int dev_type,
int bound_if_index)
{
Expand All @@ -474,9 +542,11 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
if (!ndev)
return -ENODEV;
}
} else {
gid_type = IB_GID_TYPE_IB;
}

ret = ib_find_cached_gid_by_port(device, gid, IB_GID_TYPE_IB, port,
ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
ndev, NULL);

if (ndev)
Expand Down Expand Up @@ -511,7 +581,10 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;

ret = cma_validate_port(cma_dev->device, port, gidp,
ret = cma_validate_port(cma_dev->device, port,
rdma_protocol_ib(cma_dev->device, port) ?
IB_GID_TYPE_IB :
listen_id_priv->gid_type, gidp,
dev_addr->dev_type,
dev_addr->bound_dev_if);
if (!ret) {
Expand All @@ -530,8 +603,11 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;

ret = cma_validate_port(cma_dev->device, port, gidp,
dev_addr->dev_type,
ret = cma_validate_port(cma_dev->device, port,
rdma_protocol_ib(cma_dev->device, port) ?
IB_GID_TYPE_IB :
cma_dev->default_gid_type[port - 1],
gidp, dev_addr->dev_type,
dev_addr->bound_dev_if);
if (!ret) {
id_priv->id.port_num = port;
Expand Down Expand Up @@ -2062,7 +2138,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
rdma_addr_size(cma_src_addr(id_priv)));

cma_attach_to_dev(dev_id_priv, cma_dev);
_cma_attach_to_dev(dev_id_priv, cma_dev);
list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
atomic_inc(&id_priv->refcount);
dev_id_priv->internal_id = 1;
Expand Down Expand Up @@ -3896,12 +3972,27 @@ static void cma_add_one(struct ib_device *device)
{
struct cma_device *cma_dev;
struct rdma_id_private *id_priv;
unsigned int i;
unsigned long supported_gids = 0;

cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
if (!cma_dev)
return;

cma_dev->device = device;
cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_gid_type),
GFP_KERNEL);
if (!cma_dev->default_gid_type) {
kfree(cma_dev);
return;
}
for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
supported_gids = roce_gid_type_mask_support(device, i);
WARN_ON(!supported_gids);
cma_dev->default_gid_type[i - rdma_start_port(device)] =
find_first_bit(&supported_gids, BITS_PER_LONG);
}

init_completion(&cma_dev->comp);
atomic_set(&cma_dev->refcount, 1);
Expand Down Expand Up @@ -3981,6 +4072,7 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
mutex_unlock(&lock);

cma_process_remove(cma_dev);
kfree(cma_dev->default_gid_type);
kfree(cma_dev);
}

Expand Down Expand Up @@ -4114,6 +4206,7 @@ static int __init cma_init(void)

if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n");
cma_configfs_init();

return 0;

Expand All @@ -4128,6 +4221,7 @@ static int __init cma_init(void)

static void __exit cma_cleanup(void)
{
cma_configfs_exit();
ibnl_remove_client(RDMA_NL_RDMA_CM);
ib_unregister_client(&cma_client);
unregister_netdevice_notifier(&cma_nb);
Expand Down
Loading

0 comments on commit 045959d

Please sign in to comment.