@@ -1133,15 +1133,16 @@ ib_frag_alloc(mca_btl_openib_module_t *btl, size_t size, uint8_t order,
11331133
11341134/* check if pending fragment has enough space for coalescing */
11351135static mca_btl_openib_send_frag_t * check_coalescing (opal_list_t * frag_list ,
1136- opal_mutex_t * lock , mca_btl_base_endpoint_t * ep , size_t size )
1136+ opal_mutex_t * lock , struct mca_btl_base_endpoint_t * ep , size_t size ,
1137+ mca_btl_openib_coalesced_frag_t * * cfrag )
11371138{
11381139 mca_btl_openib_send_frag_t * frag = NULL ;
11391140
1140- if (opal_list_is_empty (frag_list ))
1141+ if (opal_list_is_empty (frag_list ))
11411142 return NULL ;
11421143
11431144 OPAL_THREAD_LOCK (lock );
1144- if (!opal_list_is_empty (frag_list )) {
1145+ if (!opal_list_is_empty (frag_list )) {
11451146 int qp ;
11461147 size_t total_length ;
11471148 opal_list_item_t * i = opal_list_get_first (frag_list );
@@ -1158,10 +1159,20 @@ static mca_btl_openib_send_frag_t *check_coalescing(opal_list_t *frag_list,
11581159
11591160 qp = to_base_frag (frag )-> base .order ;
11601161
1161- if (total_length <= mca_btl_openib_component .qp_infos [qp ].size )
1162- opal_list_remove_first (frag_list );
1163- else
1162+ if (total_length <= mca_btl_openib_component .qp_infos [qp ].size ) {
1163+ /* make sure we can allocate a coalescing frag before returning success */
1164+ * cfrag = alloc_coalesced_frag ();
1165+ if (OPAL_LIKELY (NULL != cfrag )) {
1166+ (* cfrag )-> send_frag = frag ;
1167+ (* cfrag )-> sent = false;
1168+
1169+ opal_list_remove_first (frag_list );
1170+ } else {
1171+ frag = NULL ;
1172+ }
1173+ } else {
11641174 frag = NULL ;
1175+ }
11651176 }
11661177 OPAL_THREAD_UNLOCK (lock );
11671178
@@ -1188,7 +1199,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_alloc(
11881199 mca_btl_openib_module_t * obtl = (mca_btl_openib_module_t * )btl ;
11891200 int qp = frag_size_to_order (obtl , size );
11901201 mca_btl_openib_send_frag_t * sfrag = NULL ;
1191- mca_btl_openib_coalesced_frag_t * cfrag ;
1202+ mca_btl_openib_coalesced_frag_t * cfrag = NULL ;
11921203
11931204 assert (qp != MCA_BTL_NO_ORDER );
11941205
@@ -1197,26 +1208,25 @@ mca_btl_base_descriptor_t* mca_btl_openib_alloc(
11971208 int prio = !(flags & MCA_BTL_DES_FLAGS_PRIORITY );
11981209
11991210 sfrag = check_coalescing (& ep -> qps [qp ].no_wqe_pending_frags [prio ],
1200- & ep -> endpoint_lock , ep , size );
1211+ & ep -> endpoint_lock , ep , size , & cfrag );
12011212
1202- if (NULL == sfrag ) {
1213+ if (NULL == sfrag ) {
12031214 if (BTL_OPENIB_QP_TYPE_PP (qp )) {
12041215 sfrag = check_coalescing (& ep -> qps [qp ].no_credits_pending_frags [prio ],
1205- & ep -> endpoint_lock , ep , size );
1216+ & ep -> endpoint_lock , ep , size , & cfrag );
12061217 } else {
12071218 sfrag = check_coalescing (
12081219 & obtl -> qps [qp ].u .srq_qp .pending_frags [prio ],
1209- & obtl -> ib_lock , ep , size );
1220+ & obtl -> ib_lock , ep , size , & cfrag );
12101221 }
12111222 }
12121223 }
12131224
1214- if (NULL == sfrag )
1225+ if (NULL == sfrag ) {
12151226 return ib_frag_alloc ((mca_btl_openib_module_t * )btl , size , order , flags );
1227+ }
12161228
12171229 /* begin coalescing message */
1218- cfrag = alloc_coalesced_frag ();
1219- cfrag -> send_frag = sfrag ;
12201230
12211231 /* fix up new coalescing header if this is the first coalesced frag */
12221232 if (sfrag -> hdr != sfrag -> chdr ) {
@@ -1250,10 +1260,9 @@ mca_btl_base_descriptor_t* mca_btl_openib_alloc(
12501260 to_base_frag (cfrag )-> segment .base .seg_addr .pval = cfrag -> hdr + 1 ;
12511261 to_base_frag (cfrag )-> segment .base .seg_len = size ;
12521262
1253- /* save coalesced fragment on a main fragment; we will need it after send
1254- * completion to free it and to call upper layer callback */
1255- opal_list_append (& sfrag -> coalesced_frags , (opal_list_item_t * )cfrag );
1256- sfrag -> coalesced_length += (size + sizeof (mca_btl_openib_header_coalesced_t ));
1263+ /* NTH: there is no reason to append the coalesced fragment here. No more
1264+ * fragments will be added until either send or free has been called on
1265+ * the coalesced frag. */
12571266
12581267 to_base_frag (cfrag )-> base .des_flags = flags ;
12591268
@@ -1306,6 +1315,15 @@ int mca_btl_openib_free(
13061315 default :
13071316 break ;
13081317 }
1318+
1319+ if (openib_frag_type (des ) == MCA_BTL_OPENIB_FRAG_COALESCED && !to_coalesced_frag (des )-> sent ) {
1320+ mca_btl_openib_send_frag_t * sfrag = to_coalesced_frag (des )-> send_frag ;
1321+
1322+ /* the coalesced fragment would have sent the original fragment but that
1323+ * will not happen so send the fragment now */
1324+ mca_btl_openib_endpoint_send (to_com_frag (sfrag )-> endpoint , sfrag );
1325+ }
1326+
13091327 MCA_BTL_IB_FRAG_RETURN (des );
13101328
13111329 return OPAL_SUCCESS ;
@@ -1888,11 +1906,19 @@ int mca_btl_openib_send(
18881906 openib_frag_type (des ) == MCA_BTL_OPENIB_FRAG_COALESCED );
18891907
18901908 if (openib_frag_type (des ) == MCA_BTL_OPENIB_FRAG_COALESCED ) {
1909+ frag = to_coalesced_frag (des )-> send_frag ;
1910+
1911+ /* save coalesced fragment on a main fragment; we will need it after send
1912+ * completion to free it and to call upper layer callback */
1913+ opal_list_append (& frag -> coalesced_frags , (opal_list_item_t * ) des );
1914+ frag -> coalesced_length += to_coalesced_frag (des )-> hdr -> alloc_size +
1915+ sizeof (mca_btl_openib_header_coalesced_t );
1916+
1917+ to_coalesced_frag (des )-> sent = true;
18911918 to_coalesced_frag (des )-> hdr -> tag = tag ;
18921919 to_coalesced_frag (des )-> hdr -> size = des -> des_local -> seg_len ;
18931920 if (ep -> nbo )
18941921 BTL_OPENIB_HEADER_COALESCED_HTON (* to_coalesced_frag (des )-> hdr );
1895- frag = to_coalesced_frag (des )-> send_frag ;
18961922 } else {
18971923 frag = to_send_frag (des );
18981924 to_com_frag (des )-> endpoint = ep ;
0 commit comments