Skip to content

Commit

Permalink
fabtests/common: separate RX and RMA counters
Browse files Browse the repository at this point in the history
Previously, the same counter was used for receives and for
remote reads and writes. This caused an issue when using
counters with RMA tests since synchronization calls check
for completions up to rx_seq which are only incremented
when receives are posted. This allowed synchronization calls
to pass before they were ready.

Separating the counters, allows the synchronization calls to
properly compare the rx counter value with the rx_seq for
the posted receives only and not the RMA test operations.

The RMA trigger test is updated to remove unintuitive logic
to account for this discrepancy.

The RMA event test is update to explicitly call the counter
wait call with the rma counter instead of going through the
general completion call.

Signed-off-by: Alexia Ingerson <[email protected]>
  • Loading branch information
aingerson committed Jul 7, 2023
1 parent 72d71e7 commit 170d2a4
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 34 deletions.
34 changes: 24 additions & 10 deletions fabtests/common/shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ struct fid_poll *pollset;
struct fid_pep *pep;
struct fid_ep *ep, *alias_ep;
struct fid_cq *txcq, *rxcq;
struct fid_cntr *txcntr, *rxcntr;
struct fid_cntr *txcntr, *rxcntr, *rma_cntr;

struct fid_ep *srx;
struct fid_stx *stx;
struct fid_mr *mr;
Expand Down Expand Up @@ -691,7 +692,8 @@ int ft_open_fabric_res(void)

int ft_alloc_ep_res(struct fi_info *fi, struct fid_cq **new_txcq,
struct fid_cq **new_rxcq, struct fid_cntr **new_txcntr,
struct fid_cntr **new_rxcntr)
struct fid_cntr **new_rxcntr,
struct fid_cntr **new_rma_cntr)
{
int ret;

Expand Down Expand Up @@ -766,6 +768,14 @@ int ft_alloc_ep_res(struct fi_info *fi, struct fid_cq **new_txcq,
FT_PRINTERR("fi_cntr_open", ret);
return ret;
}

if (fi->caps & FI_RMA) {
ret = ft_cntr_open(new_rma_cntr);
if (ret) {
FT_PRINTERR("fi_cntr_open", ret);
return ret;
}
}
}

if (!av && (fi->ep_attr->type == FI_EP_RDM || fi->ep_attr->type == FI_EP_DGRAM)) {
Expand All @@ -788,7 +798,7 @@ int ft_alloc_ep_res(struct fi_info *fi, struct fid_cq **new_txcq,
int ft_alloc_active_res(struct fi_info *fi)
{
int ret;
ret = ft_alloc_ep_res(fi, &txcq, &rxcq, &txcntr, &rxcntr);
ret = ft_alloc_ep_res(fi, &txcq, &rxcq, &txcntr, &rxcntr, &rma_cntr);
if (ret)
return ret;

Expand Down Expand Up @@ -1261,7 +1271,8 @@ int ft_init_alias_ep(uint64_t flags)

int ft_enable_ep(struct fid_ep *bind_ep, struct fid_eq *bind_eq, struct fid_av *bind_av,
struct fid_cq *bind_txcq, struct fid_cq *bind_rxcq,
struct fid_cntr *bind_txcntr, struct fid_cntr *bind_rxcntr)
struct fid_cntr *bind_txcntr, struct fid_cntr *bind_rxcntr,
struct fid_cntr *bind_rma_cntr)
{
uint64_t flags;
int ret;
Expand Down Expand Up @@ -1306,12 +1317,14 @@ int ft_enable_ep(struct fid_ep *bind_ep, struct fid_eq *bind_eq, struct fid_av *
flags = 0;
else
flags = FI_RECV;
if (hints->caps & (FI_REMOTE_WRITE | FI_REMOTE_READ))
flags |= hints->caps & (FI_REMOTE_WRITE | FI_REMOTE_READ);
else if (hints->caps & FI_RMA)
flags |= FI_REMOTE_WRITE | FI_REMOTE_READ;

FT_EP_BIND(bind_ep, bind_rxcntr, flags);

if (hints->caps & (FI_RMA | FI_ATOMICS) && hints->caps & FI_RMA_EVENT) {
flags = fi->caps & (FI_REMOTE_WRITE | FI_REMOTE_READ);
FT_EP_BIND(bind_ep, bind_rma_cntr, flags);
}

ret = fi_enable(bind_ep);
if (ret) {
FT_PRINTERR("fi_enable", ret);
Expand All @@ -1325,7 +1338,7 @@ int ft_enable_ep_recv(void)
{
int ret;

ret = ft_enable_ep(ep, eq, av, txcq, rxcq, txcntr, rxcntr);
ret = ft_enable_ep(ep, eq, av, txcq, rxcq, txcntr, rxcntr, rma_cntr);
if (ret)
return ret;

Expand Down Expand Up @@ -1684,6 +1697,7 @@ void ft_close_fids(void)
}
FT_CLOSE_FID(rxcntr);
FT_CLOSE_FID(txcntr);
FT_CLOSE_FID(rma_cntr);
FT_CLOSE_FID(pollset);
if (mr != &no_mr)
FT_CLOSE_FID(mr);
Expand Down Expand Up @@ -2508,7 +2522,7 @@ static int ft_wait_for_cntr(struct fid_cntr *cntr, uint64_t total, int timeout)
return 0;
}

static int ft_get_cntr_comp(struct fid_cntr *cntr, uint64_t total, int timeout)
int ft_get_cntr_comp(struct fid_cntr *cntr, uint64_t total, int timeout)
{
int ret = 0;

Expand Down
12 changes: 6 additions & 6 deletions fabtests/functional/multi_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,12 +212,12 @@ static int setup_client_ep(int idx)
return ret;
}

ret = ft_alloc_ep_res(fi, &txcqs[idx], &rxcqs[idx], NULL, NULL);
ret = ft_alloc_ep_res(fi, &txcqs[idx], &rxcqs[idx], NULL, NULL, NULL);
if (ret)
return ret;

ret = ft_enable_ep(eps[idx], eq, av, txcqs[idx], rxcqs[idx],
NULL, NULL);
NULL, NULL, NULL);
if (ret)
return ret;

Expand All @@ -242,12 +242,12 @@ static int setup_server_ep(int idx)
goto failed_accept;
}

ret = ft_alloc_ep_res(fi, &txcqs[idx], &rxcqs[idx], NULL, NULL);
ret = ft_alloc_ep_res(fi, &txcqs[idx], &rxcqs[idx], NULL, NULL, NULL);
if (ret)
return ret;

ret = ft_enable_ep(eps[idx], eq, av, txcqs[idx], rxcqs[idx],
NULL, NULL);
NULL, NULL, NULL);
if (ret)
goto failed_accept;

Expand Down Expand Up @@ -285,7 +285,7 @@ static int setup_av_ep(int idx)
return ret;
}

ret = ft_alloc_ep_res(fi, &txcqs[idx], &rxcqs[idx], NULL, NULL);
ret = ft_alloc_ep_res(fi, &txcqs[idx], &rxcqs[idx], NULL, NULL, NULL);
if (ret)
return ret;

Expand All @@ -297,7 +297,7 @@ static int enable_ep(int idx)
int ret;

ret = ft_enable_ep(eps[idx], eq, av, txcqs[idx], rxcqs[idx],
NULL, NULL);
NULL, NULL, NULL);
if (ret)
return ret;

Expand Down
2 changes: 2 additions & 0 deletions fabtests/functional/rdm_atomic.c
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,8 @@ static int run_op(void)
break;
}


ft_sync();
free(count);
fn:
return ret;
Expand Down
4 changes: 2 additions & 2 deletions fabtests/functional/rdm_rma_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,13 @@ static int run_test(void)
if (ret)
return ret;

ret = ft_get_tx_comp(++tx_seq);
ret = ft_get_cntr_comp(txcntr, ++tx_seq, timeout);
if (ret)
return ret;

fprintf(stdout, "Received a completion event for RMA write\n");
} else {
ret = ft_get_rx_comp(rx_seq);
ret = ft_get_cntr_comp(rma_cntr, 1, timeout);
if (ret)
return ret;

Expand Down
9 changes: 4 additions & 5 deletions fabtests/functional/rdm_rma_trigger.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ static int rma_write_trigger(void *src, size_t size,
static int run_test(void)
{
int ret = 0;
uint64_t start_tx, start_rx;
uint64_t start_tx;

ret = ft_init_fabric();
if (ret)
Expand All @@ -77,7 +77,6 @@ static int run_test(void)
return ret;

start_tx = fi_cntr_read(txcntr);
start_rx = fi_cntr_read(rxcntr);

ft_sync();
if (opts.dst_addr) {
Expand Down Expand Up @@ -107,9 +106,9 @@ static int run_test(void)

fprintf(stdout, "Received completion events for RMA write operations\n");
} else {
/* The value of the rx counter should have increased by 2
* for both operations (write and triggered) */
ret = fi_cntr_wait(rxcntr, start_rx + 2, -1);
/* The value of the rma counter should have increased by 2
* for both remote operations (write and triggered) */
ret = fi_cntr_wait(rma_cntr, 2, -1);
if (ret < 0) {
FT_PRINTERR("fi_cntr_wait", ret);
goto out;
Expand Down
2 changes: 1 addition & 1 deletion fabtests/functional/scalable_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ static int alloc_ep_res(struct fid_ep *sep)

av_attr.rx_ctx_bits = rx_ctx_bits;

ret = ft_alloc_ep_res(fi, &txcq, &rxcq, &txcntr, &rxcntr);
ret = ft_alloc_ep_res(fi, &txcq, &rxcq, &txcntr, &rxcntr, NULL);
if (ret)
return ret;

Expand Down
13 changes: 8 additions & 5 deletions fabtests/functional/shared_ctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ static int enable_eps(void)
{
int i, ret;
for (i = 0; i < ep_cnt; i++) {
ret = ft_enable_ep(ep_array[i], eq, av, txcq, rxcq, txcntr, rxcntr);
ret = ft_enable_ep(ep_array[i], eq, av, txcq, rxcq, txcntr,
rxcntr, rma_cntr);
if (ret)
return ret;
}
Expand Down Expand Up @@ -294,7 +295,7 @@ static int init_fabric(void)

av_attr.count = ep_cnt;

ret = ft_alloc_ep_res(fi, &txcq, &rxcq, &txcntr, &rxcntr);
ret = ft_alloc_ep_res(fi, &txcq, &rxcq, &txcntr, &rxcntr, NULL);
if (ret)
return ret;

Expand Down Expand Up @@ -349,7 +350,7 @@ static int client_connect(void)
if (ret)
return ret;

ret = ft_alloc_ep_res(fi, &txcq, &rxcq, &txcntr, &rxcntr);
ret = ft_alloc_ep_res(fi, &txcq, &rxcq, &txcntr, &rxcntr, NULL);
if (ret)
return ret;

Expand Down Expand Up @@ -434,7 +435,8 @@ static int server_connect(void)
if (ret)
goto err;

ret = ft_alloc_ep_res(fi, &txcq, &rxcq, &txcntr, &rxcntr);
ret = ft_alloc_ep_res(fi, &txcq, &rxcq, &txcntr,
&rxcntr, NULL);
if (ret)
goto err;
}
Expand All @@ -446,7 +448,8 @@ static int server_connect(void)
}

ep_state_array[num_conn_reqs].ep = ep_array[num_conn_reqs];
ret = ft_enable_ep(ep_array[num_conn_reqs], eq, av, txcq, rxcq, txcntr, rxcntr);
ret = ft_enable_ep(ep_array[num_conn_reqs], eq, av,
txcq, rxcq, txcntr, rxcntr, rma_cntr);
if (ret)
goto err;

Expand Down
10 changes: 7 additions & 3 deletions fabtests/include/shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ extern struct fid_poll *pollset;
extern struct fid_pep *pep;
extern struct fid_ep *ep, *alias_ep;
extern struct fid_cq *txcq, *rxcq;
extern struct fid_cntr *txcntr, *rxcntr;
extern struct fid_cntr *txcntr, *rxcntr, *rma_cntr;
extern struct fid_ep *srx;
extern struct fid_stx *stx;
extern struct fid_mr *mr, no_mr;
Expand Down Expand Up @@ -426,15 +426,17 @@ int ft_connect_ep(struct fid_ep *ep,
struct fid_eq *eq, fi_addr_t *remote_addr);
int ft_alloc_ep_res(struct fi_info *fi, struct fid_cq **new_txcq,
struct fid_cq **new_rxcq, struct fid_cntr **new_txcntr,
struct fid_cntr **new_rxcntr);
struct fid_cntr **new_rxcntr,
struct fid_cntr **new_rma_cntr);
int ft_alloc_msgs(void);
int ft_alloc_host_tx_buf(size_t size);
void ft_free_host_tx_buf(void);
int ft_alloc_active_res(struct fi_info *fi);
int ft_enable_ep_recv(void);
int ft_enable_ep(struct fid_ep *bind_ep, struct fid_eq *bind_eq, struct fid_av *bind_av,
struct fid_cq *bind_txcq, struct fid_cq *bind_rxcq,
struct fid_cntr *bind_txcntr, struct fid_cntr *bind_rxcntr);
struct fid_cntr *bind_txcntr, struct fid_cntr *bind_rxcntr,
struct fid_cntr *bind_rma_cntr);

int ft_init_alias_ep(uint64_t flags);
int ft_av_insert(struct fid_av *av, void *addr, size_t count, fi_addr_t *fi_addr,
Expand Down Expand Up @@ -572,6 +574,8 @@ int ft_cq_readerr(struct fid_cq *cq);
int ft_get_rx_comp(uint64_t total);
int ft_get_tx_comp(uint64_t total);
int ft_get_cq_comp(struct fid_cq *cq, uint64_t *cur, uint64_t total, int timeout);
int ft_get_cntr_comp(struct fid_cntr *cntr, uint64_t total, int timeout);

int ft_recvmsg(struct fid_ep *ep, fi_addr_t fi_addr,
size_t size, void *ctx, int flags);
int ft_sendmsg(struct fid_ep *ep, fi_addr_t fi_addr,
Expand Down
2 changes: 1 addition & 1 deletion fabtests/multinode/src/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ static int multi_setup_fabric(int argc, char **argv)
if (ret)
return ret;

ret = ft_enable_ep(ep, eq, av, txcq, rxcq, txcntr, rxcntr);
ret = ft_enable_ep(ep, eq, av, txcq, rxcq, txcntr, rxcntr, rma_cntr);
if (ret)
return ret;

Expand Down
2 changes: 1 addition & 1 deletion fabtests/multinode/src/core_coll.c
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ static int multinode_setup_fabric(int argc, char **argv)
if (err)
return err;

err = ft_enable_ep(ep, eq, av, txcq, rxcq, txcntr, rxcntr);
err = ft_enable_ep(ep, eq, av, txcq, rxcq, txcntr, rxcntr, rma_cntr);
if (err)
return err;

Expand Down

0 comments on commit 170d2a4

Please sign in to comment.