3636
3737int
3838ompi_coll_base_bcast_intra_generic ( void * buffer ,
39- int count ,
40- struct ompi_datatype_t * datatype ,
41- int root ,
42- struct ompi_communicator_t * comm ,
43- mca_coll_base_module_t * module ,
44- size_t segment_size ,
45- ompi_coll_tree_t * tree )
46- {
47- int err = 0 , line , i , rank , segindex , req_index ;
48- int num_segments ; /* Number of segments */
49- int sendcount ; /* number of elements sent in this segment */
50- size_t realsegsize , type_size ;
51- char * tmpbuf ;
52- ptrdiff_t extent , lb ;
53- ompi_request_t * recv_reqs [2 ] = {MPI_REQUEST_NULL , MPI_REQUEST_NULL };
54- ompi_request_t * * send_reqs = NULL ;
55-
56- #if OPAL_ENABLE_DEBUG
57- int size ;
58- size = ompi_comm_size (comm );
59- assert ( size > 1 );
60- #endif
61- rank = ompi_comm_rank (comm );
62-
63- ompi_datatype_get_extent (datatype , & lb , & extent );
64- ompi_datatype_type_size ( datatype , & type_size );
65- num_segments = (original_count + count_by_segment - 1 ) / count_by_segment ;
66- realsegsize = (ptrdiff_t )count_by_segment * extent ;
67-
68- /* Set the buffer pointers */
69- tmpbuf = (char * ) buffer ;
70-
71- if ( tree -> tree_nextsize != 0 ) {
72- send_reqs = ompi_coll_base_comm_get_reqs (module -> base_data , tree -> tree_nextsize );
73- if ( NULL == send_reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE ; line = __LINE__ ; goto error_hndl ; }
74- }
75-
76- /* Root code */
77- if ( rank == root ) {
78- /*
79- For each segment:
80- - send segment to all children.
81- The last segment may have less elements than other segments.
82- */
83- sendcount = count_by_segment ;
84- for ( segindex = 0 ; segindex < num_segments ; segindex ++ ) {
85- if ( segindex == (num_segments - 1 ) ) {
86- sendcount = original_count - segindex * count_by_segment ;
87- }
88- for ( i = 0 ; i < tree -> tree_nextsize ; i ++ ) {
89- err = MCA_PML_CALL (isend (tmpbuf , sendcount , datatype ,
90- tree -> tree_next [i ],
91- MCA_COLL_BASE_TAG_BCAST ,
92- MCA_PML_BASE_SEND_STANDARD , comm ,
93- & send_reqs [i ]));
94- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
95- }
96-
97- /* complete the sends before starting the next sends */
98- err = ompi_request_wait_all ( tree -> tree_nextsize , send_reqs ,
99- MPI_STATUSES_IGNORE );
100- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
101-
102- /* update tmp buffer */
103- tmpbuf += realsegsize ;
104-
105- }
106- }
107-
108- /* Intermediate nodes code */
109- else if ( tree -> tree_nextsize > 0 ) {
110- /*
111- Create the pipeline.
112- 1) Post the first receive
113- 2) For segments 1 .. num_segments
114- - post new receive
115- - wait on the previous receive to complete
116- - send this data to children
117- 3) Wait on the last segment
118- 4) Compute number of elements in last segment.
119- 5) Send the last segment to children
120- */
121- req_index = 0 ;
122- err = MCA_PML_CALL (irecv (tmpbuf , count_by_segment , datatype ,
123- tree -> tree_prev , MCA_COLL_BASE_TAG_BCAST ,
124- comm , & recv_reqs [req_index ]));
125- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
126-
127- for ( segindex = 1 ; segindex < num_segments ; segindex ++ ) {
128-
129- req_index = req_index ^ 0x1 ;
130-
131- /* post new irecv */
132- err = MCA_PML_CALL (irecv ( tmpbuf + realsegsize , count_by_segment ,
133- datatype , tree -> tree_prev ,
134- MCA_COLL_BASE_TAG_BCAST ,
135- comm , & recv_reqs [req_index ]));
136- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
137-
138- /* wait for and forward the previous segment to children */
139- err = ompi_request_wait ( & recv_reqs [req_index ^ 0x1 ],
140- MPI_STATUS_IGNORE );
141- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
142-
143- for ( i = 0 ; i < tree -> tree_nextsize ; i ++ ) {
144- err = MCA_PML_CALL (isend (tmpbuf , count_by_segment , datatype ,
145- tree -> tree_next [i ],
146- MCA_COLL_BASE_TAG_BCAST ,
147- MCA_PML_BASE_SEND_STANDARD , comm ,
148- & send_reqs [i ]));
149- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
150- }
151-
152- /* complete the sends before starting the next iteration */
153- err = ompi_request_wait_all ( tree -> tree_nextsize , send_reqs ,
154- MPI_STATUSES_IGNORE );
155- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
156-
157- /* Update the receive buffer */
158- tmpbuf += realsegsize ;
159-
160- }
161-
162- /* Process the last segment */
163- err = ompi_request_wait ( & recv_reqs [req_index ], MPI_STATUS_IGNORE );
164- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
165- sendcount = original_count - (ptrdiff_t )(num_segments - 1 ) * count_by_segment ;
166- for ( i = 0 ; i < tree -> tree_nextsize ; i ++ ) {
167- err = MCA_PML_CALL (isend (tmpbuf , sendcount , datatype ,
168- tree -> tree_next [i ],
169- MCA_COLL_BASE_TAG_BCAST ,
170- MCA_PML_BASE_SEND_STANDARD , comm ,
171- & send_reqs [i ]));
172- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
173- }
174-
175- err = ompi_request_wait_all ( tree -> tree_nextsize , send_reqs ,
176- MPI_STATUSES_IGNORE );
177- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
178- }
179-
180- /* Leaf nodes */
181- else {
182- /*
183- Receive all segments from parent in a loop:
184- 1) post irecv for the first segment
185- 2) for segments 1 .. num_segments
186- - post irecv for the next segment
187- - wait on the previous segment to arrive
188- 3) wait for the last segment
189- */
190- req_index = 0 ;
191- err = MCA_PML_CALL (irecv (tmpbuf , count_by_segment , datatype ,
192- tree -> tree_prev , MCA_COLL_BASE_TAG_BCAST ,
193- comm , & recv_reqs [req_index ]));
194- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
195-
196- for ( segindex = 1 ; segindex < num_segments ; segindex ++ ) {
197- req_index = req_index ^ 0x1 ;
198- tmpbuf += realsegsize ;
199- /* post receive for the next segment */
200- err = MCA_PML_CALL (irecv (tmpbuf , count_by_segment , datatype ,
201- tree -> tree_prev , MCA_COLL_BASE_TAG_BCAST ,
202- comm , & recv_reqs [req_index ]));
203- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
204- /* wait on the previous segment */
205- err = ompi_request_wait ( & recv_reqs [req_index ^ 0x1 ],
206- MPI_STATUS_IGNORE );
207- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
208- }
209-
210- err = ompi_request_wait ( & recv_reqs [req_index ], MPI_STATUS_IGNORE );
211- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
212- }
213-
214- return (MPI_SUCCESS );
215-
216- error_hndl :
217- OPAL_OUTPUT ( (ompi_coll_base_framework .framework_output ,"%s:%4d\tError occurred %d, rank %2d" ,
218- __FILE__ , line , err , rank ) );
219- (void )line ; // silence compiler warnings
220- ompi_coll_base_free_reqs ( recv_reqs , 2 );
221- if ( NULL != send_reqs ) {
222- ompi_coll_base_free_reqs (send_reqs , tree -> tree_nextsize );
223- }
224-
225- return err ;
226- }
227-
228- int
229- ompi_coll_base_bcast_intra_generic2 ( void * buffer ,
23039 int count ,
23140 struct ompi_datatype_t * datatype ,
23241 int root ,
@@ -252,7 +61,7 @@ ompi_coll_base_bcast_intra_generic2( void* buffer,
25261 rank = ompi_comm_rank (comm );
25362
25463 if ( tree -> tree_nextsize != 0 ) {
255- send_reqs = coll_base_comm_get_reqs (module -> base_data , tree -> tree_nextsize );
64+ send_reqs = ompi_coll_base_comm_get_reqs (module -> base_data , tree -> tree_nextsize );
25665 if ( NULL == send_reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE ; line = __LINE__ ; goto error_hndl ; }
25766 }
25867
@@ -793,10 +602,11 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer,
793602 offsets [0 ] += sizes [0 ];
794603 }
795604 opal_convertor_set_position (& recv_convertors [0 ], & offsets [0 ]);
605+
796606 offsets [0 ] += sizes [lr ] - remainings [lr ];
797607 offsets [1 ] = offsets [0 ] + segsize ;
798- if (offsets [1 ] > sizes [lr ]) {
799- offsets [1 ] = sizes [lr ];
608+ if (offsets [1 ] > sizes [lr ] + ( lr ? sizes [ 0 ]: 0 ) ) {
609+ offsets [1 ] = sizes [lr ] + ( lr ? sizes [ 0 ]: 0 ) ;
800610 }
801611 opal_convertor_set_position (& recv_convertors [1 ], & offsets [1 ]);
802612 if (offsets [1 ] == offsets [0 ]) {
0 commit comments