Skip to content

Commit

Permalink
Merge branch 'net-smc-virt-contig-buffers'
Browse files Browse the repository at this point in the history
Wen Gu says:

====================
net/smc: Introduce virtually contiguous buffers for SMC-R

On long-running enterprise production servers, high-order contiguous
memory pages are usually very rare and in most cases we can only get
fragmented pages.

When replacing TCP with SMC-R in such production scenarios, attempting
to allocate high-order physically contiguous sndbufs and RMBs may result
in frequent memory compaction, which will cause unexpected hung issue
and further stability risks.

So this patch set is aimed to allow SMC-R link group to use virtually
contiguous sndbufs and RMBs to avoid potential issues mentioned above.
Whether to use physically or virtually contiguous buffers can be set
by sysctl smcr_buf_type.

Note that using virtually contiguous buffers will bring an acceptable
performance regression, which can be mainly divided into two parts:

1) regression in data path, which is brought by additional address
   translation of sndbuf by RNIC in Tx. But in general, translating
   address through MTT is fast. According to qperf test, this part
   regression is basically less than 10% in latency and bandwidth.
   (see patch 5/6 for details)

2) regression in buffer initialization and destruction path, which is
   brought by additional MR operations of sndbufs. But thanks to link
   group buffer reuse mechanism, the impact of this kind of regression
   decreases as times of buffer reuse increases.

Patch set overview:
- Patch 1/6 and 2/6 mainly about simplifying and optimizing DMA sync
  operation, which will reduce overhead on the data path, especially
  when using virtually contiguous buffers;
- Patch 3/6 and 4/6 introduce a sysctl smcr_buf_type to set the type
  of buffers in new created link group;
- Patch 5/6 allows SMC-R to use virtually contiguous sndbufs and RMBs,
  including buffer creation, destruction, MR operation and access;
- patch 6/6 extends netlink attribute for buffer type of SMC-R link group;

v1->v2:
- Patch 5/6 fixes build issue on 32bit;
- Patch 3/6 adds description of new sysctl in smc-sysctl.rst;
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Jul 18, 2022
2 parents 2acd102 + ddefb2d commit 3898f52
Show file tree
Hide file tree
Showing 14 changed files with 404 additions and 147 deletions.
13 changes: 13 additions & 0 deletions Documentation/networking/smc-sysctl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,16 @@ autocorking_size - INTEGER
know how/when to uncork their sockets.

Default: 64K

smcr_buf_type - INTEGER
Controls which type of sndbufs and RMBs to use in later newly created
SMC-R link group. Only for SMC-R.

Default: 0 (physically contiguous sndbufs and RMBs)

Possible values:

- 0 - Use physically contiguous buffers
- 1 - Use virtually contiguous buffers
- 2 - Mixed use of the two types. Try physically contiguous buffers first.
If not available, use virtually contiguous buffers then.
1 change: 1 addition & 0 deletions include/net/netns/smc.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ struct netns_smc {
struct ctl_table_header *smc_hdr;
#endif
unsigned int sysctl_autocorking_size;
unsigned int sysctl_smcr_buf_type;
};
#endif
1 change: 1 addition & 0 deletions include/uapi/linux/smc.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ enum {
SMC_NLA_LGR_R_V2, /* nest */
SMC_NLA_LGR_R_NET_COOKIE, /* u64 */
SMC_NLA_LGR_R_PAD, /* flag */
SMC_NLA_LGR_R_BUF_TYPE, /* u8 */
__SMC_NLA_LGR_R_MAX,
SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1
};
Expand Down
68 changes: 58 additions & 10 deletions net/smc/af_smc.c
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,29 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}

/* register the new vzalloced sndbuf on all links */
static int smcr_lgr_reg_sndbufs(struct smc_link *link,
struct smc_buf_desc *snd_desc)
{
struct smc_link_group *lgr = link->lgr;
int i, rc = 0;

if (!snd_desc->is_vm)
return -EINVAL;

/* protect against parallel smcr_link_reg_buf() */
mutex_lock(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&lgr->lnk[i]))
continue;
rc = smcr_link_reg_buf(&lgr->lnk[i], snd_desc);
if (rc)
break;
}
mutex_unlock(&lgr->llc_conf_mutex);
return rc;
}

/* register the new rmb on all links */
static int smcr_lgr_reg_rmbs(struct smc_link *link,
struct smc_buf_desc *rmb_desc)
Expand All @@ -498,13 +521,13 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
if (rc)
return rc;
/* protect against parallel smc_llc_cli_rkey_exchange() and
* parallel smcr_link_reg_rmb()
* parallel smcr_link_reg_buf()
*/
mutex_lock(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&lgr->lnk[i]))
continue;
rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc);
rc = smcr_link_reg_buf(&lgr->lnk[i], rmb_desc);
if (rc)
goto out;
}
Expand Down Expand Up @@ -550,8 +573,15 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)

smc_wr_remember_qp_attr(link);

if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
return SMC_CLC_DECL_ERR_REGRMB;
/* reg the sndbuf if it was vzalloced */
if (smc->conn.sndbuf_desc->is_vm) {
if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
return SMC_CLC_DECL_ERR_REGBUF;
}

/* reg the rmb */
if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
return SMC_CLC_DECL_ERR_REGBUF;

/* confirm_rkey is implicit on 1st contact */
smc->conn.rmb_desc->is_conf_rkey = true;
Expand Down Expand Up @@ -1221,12 +1251,18 @@ static int smc_connect_rdma(struct smc_sock *smc,
goto connect_abort;
}
} else {
/* reg sendbufs if they were vzalloced */
if (smc->conn.sndbuf_desc->is_vm) {
if (smcr_lgr_reg_sndbufs(link, smc->conn.sndbuf_desc)) {
reason_code = SMC_CLC_DECL_ERR_REGBUF;
goto connect_abort;
}
}
if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) {
reason_code = SMC_CLC_DECL_ERR_REGRMB;
reason_code = SMC_CLC_DECL_ERR_REGBUF;
goto connect_abort;
}
}
smc_rmb_sync_sg_for_device(&smc->conn);

if (aclc->hdr.version > SMC_V1) {
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
Expand Down Expand Up @@ -1750,8 +1786,15 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
struct smc_llc_qentry *qentry;
int rc;

if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
return SMC_CLC_DECL_ERR_REGRMB;
/* reg the sndbuf if it was vzalloced*/
if (smc->conn.sndbuf_desc->is_vm) {
if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
return SMC_CLC_DECL_ERR_REGBUF;
}

/* reg the rmb */
if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
return SMC_CLC_DECL_ERR_REGBUF;

/* send CONFIRM LINK request to client over the RoCE fabric */
rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
Expand Down Expand Up @@ -2110,10 +2153,15 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
struct smc_connection *conn = &new_smc->conn;

if (!local_first) {
/* reg sendbufs if they were vzalloced */
if (conn->sndbuf_desc->is_vm) {
if (smcr_lgr_reg_sndbufs(conn->lnk,
conn->sndbuf_desc))
return SMC_CLC_DECL_ERR_REGBUF;
}
if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
return SMC_CLC_DECL_ERR_REGRMB;
return SMC_CLC_DECL_ERR_REGBUF;
}
smc_rmb_sync_sg_for_device(&new_smc->conn);

return 0;
}
Expand Down
8 changes: 5 additions & 3 deletions net/smc/smc_clc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1034,7 +1034,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
ETH_ALEN);
hton24(clc->r0.qpn, link->roce_qp->qp_num);
clc->r0.rmb_rkey =
htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
htonl(conn->rmb_desc->mr[link->link_idx]->rkey);
clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
clc->r0.rmbe_alert_token = htonl(conn->alert_token_local);
switch (clc->hdr.type) {
Expand All @@ -1046,8 +1046,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
break;
}
clc->r0.rmbe_size = conn->rmbe_size_short;
clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
(conn->rmb_desc->sgt[link->link_idx].sgl));
clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
cpu_to_be64((u64)sg_dma_address
(conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(clc->r0.psn, link->psn_initial);
if (version == SMC_V1) {
clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
Expand Down
2 changes: 1 addition & 1 deletion net/smc/smc_clc.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
#define SMC_CLC_DECL_ERR_RTOK 0x09990001 /* rtoken handling failed */
#define SMC_CLC_DECL_ERR_RDYLNK 0x09990002 /* ib ready link failed */
#define SMC_CLC_DECL_ERR_REGRMB 0x09990003 /* reg rmb failed */
#define SMC_CLC_DECL_ERR_REGBUF 0x09990003 /* reg rdma bufs failed */

#define SMC_FIRST_CONTACT_MASK 0b10 /* first contact bit within typev2 */

Expand Down
Loading

0 comments on commit 3898f52

Please sign in to comment.