Skip to content

Commit 6b981e2

Browse files
wangxi11jgunthorpe
authored andcommitted
RDMA/hns: Clear remaining unused sges when post_recv
The HIP09 requires the driver to clear the unused data segments in wqe buffer to make the hns ROCEE stop reading the remaining invalid sges for RQ. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Xi Wang <[email protected]> Signed-off-by: Weihang Li <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 9ae2a37 commit 6b981e2

File tree

1 file changed

+47
-52
lines changed

1 file changed

+47
-52
lines changed

drivers/infiniband/hw/hns/hns_roce_hw_v2.c

Lines changed: 47 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -729,28 +729,42 @@ static int check_recv_valid(struct hns_roce_dev *hr_dev,
729729
return 0;
730730
}
731731

732-
static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr,
733-
u32 wqe_idx)
732+
static void fill_recv_sge_to_wqe(const struct ib_recv_wr *wr, void *wqe,
733+
u32 max_sge, bool rsv)
734734
{
735-
struct hns_roce_v2_wqe_data_seg *dseg;
736-
struct hns_roce_rinl_sge *sge_list;
737-
void *wqe = NULL;
738-
int i;
735+
struct hns_roce_v2_wqe_data_seg *dseg = wqe;
736+
u32 i, cnt;
739737

740-
wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
741-
dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
742-
for (i = 0; i < wr->num_sge; i++) {
738+
for (i = 0, cnt = 0; i < wr->num_sge; i++) {
739+
/* Skip zero-length sge */
743740
if (!wr->sg_list[i].length)
744741
continue;
745-
set_data_seg_v2(dseg, wr->sg_list + i);
746-
dseg++;
742+
set_data_seg_v2(dseg + cnt, wr->sg_list + i);
743+
cnt++;
747744
}
748745

749-
if (hr_qp->rq.rsv_sge) {
750-
dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
751-
dseg->addr = 0;
752-
dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
746+
/* Fill a reserved sge to make hw stop reading remaining segments */
747+
if (rsv) {
748+
dseg[cnt].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
749+
dseg[cnt].addr = 0;
750+
dseg[cnt].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
751+
} else {
752+
/* Clear remaining segments to make ROCEE ignore sges */
753+
if (cnt < max_sge)
754+
memset(dseg + cnt, 0,
755+
(max_sge - cnt) * HNS_ROCE_SGE_SIZE);
753756
}
757+
}
758+
759+
static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr,
760+
u32 wqe_idx, u32 max_sge)
761+
{
762+
struct hns_roce_rinl_sge *sge_list;
763+
void *wqe = NULL;
764+
u32 i;
765+
766+
wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
767+
fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge);
754768

755769
/* rq support inline data */
756770
if (hr_qp->rq_inl_buf.wqe_cnt) {
@@ -801,8 +815,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
801815
}
802816

803817
wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
804-
fill_rq_wqe(hr_qp, wr, wqe_idx);
805-
818+
fill_rq_wqe(hr_qp, wr, wqe_idx, max_sge);
806819
hr_qp->rq.wrid[wqe_idx] = wr->wr_id;
807820
}
808821

@@ -834,18 +847,18 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
834847
return ret;
835848
}
836849

837-
static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
850+
static void *get_srq_wqe_buf(struct hns_roce_srq *srq, u32 n)
838851
{
839852
return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift);
840853
}
841854

842-
static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n)
855+
static void *get_idx_buf(struct hns_roce_idx_que *idx_que, u32 n)
843856
{
844857
return hns_roce_buf_offset(idx_que->mtr.kmem,
845858
n << idx_que->entry_shift);
846859
}
847860

848-
static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
861+
static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, u32 wqe_index)
849862
{
850863
/* always called with interrupts disabled. */
851864
spin_lock(&srq->lock);
@@ -856,7 +869,7 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
856869
spin_unlock(&srq->lock);
857870
}
858871

859-
int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, int nreq)
872+
int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, u32 nreq)
860873
{
861874
struct hns_roce_idx_que *idx_que = &srq->idx_que;
862875
unsigned int cur;
@@ -865,19 +878,18 @@ int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, int nreq)
865878
return cur + nreq >= srq->wqe_cnt;
866879
}
867880

868-
static int find_empty_entry(struct hns_roce_idx_que *idx_que,
869-
unsigned long size)
881+
static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx)
870882
{
871-
int wqe_idx;
883+
struct hns_roce_idx_que *idx_que = &srq->idx_que;
884+
u32 pos;
872885

873-
if (unlikely(bitmap_full(idx_que->bitmap, size)))
886+
pos = find_first_zero_bit(idx_que->bitmap, srq->wqe_cnt);
887+
if (unlikely(pos == srq->wqe_cnt))
874888
return -ENOSPC;
875889

876-
wqe_idx = find_first_zero_bit(idx_que->bitmap, size);
877-
878-
bitmap_set(idx_que->bitmap, wqe_idx, 1);
879-
880-
return wqe_idx;
890+
bitmap_set(idx_que->bitmap, pos, 1);
891+
*wqe_idx = pos;
892+
return 0;
881893
}
882894

883895
static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
@@ -886,17 +898,12 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
886898
{
887899
struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
888900
struct hns_roce_srq *srq = to_hr_srq(ibsrq);
889-
struct hns_roce_v2_wqe_data_seg *dseg;
901+
u32 wqe_idx, ind, nreq, max_sge;
890902
struct hns_roce_v2_db srq_db;
891903
unsigned long flags;
892-
unsigned int ind;
893904
__le32 *srq_idx;
894905
int ret = 0;
895-
int wqe_idx;
896-
u32 max_sge;
897906
void *wqe;
898-
int nreq;
899-
int i;
900907

901908
spin_lock_irqsave(&srq->lock, flags);
902909

@@ -919,26 +926,14 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
919926
break;
920927
}
921928

922-
wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt);
923-
if (unlikely(wqe_idx < 0)) {
924-
ret = -ENOMEM;
929+
ret = get_srq_wqe_idx(srq, &wqe_idx);
930+
if (unlikely(ret)) {
925931
*bad_wr = wr;
926932
break;
927933
}
928934

929-
wqe = get_srq_wqe(srq, wqe_idx);
930-
dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
931-
932-
for (i = 0; i < wr->num_sge; ++i) {
933-
set_data_seg_v2(dseg, wr->sg_list + i);
934-
dseg++;
935-
}
936-
937-
if (srq->rsv_sge) {
938-
dseg[i].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
939-
dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
940-
dseg[i].addr = 0;
941-
}
935+
wqe = get_srq_wqe_buf(srq, wqe_idx);
936+
fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge);
942937

943938
srq_idx = get_idx_buf(&srq->idx_que, ind);
944939
*srq_idx = cpu_to_le32(wqe_idx);

0 commit comments

Comments
 (0)