@@ -22,12 +22,12 @@ typedef union mca_btl_vader_fbox_hdr_t {
2222 * in multiple instructions. To ensure that seq is never loaded before tag
2323 * and the tag is never read before seq put them in the same 32-bits of the
2424 * header. */
25+ /** message size */
26+ uint32_t size ;
2527 /** message tag */
2628 uint16_t tag ;
2729 /** sequence number */
2830 uint16_t seq ;
29- /** message size */
30- uint32_t size ;
3131 } data ;
3232 uint64_t ival ;
3333} mca_btl_vader_fbox_hdr_t ;
@@ -52,20 +52,24 @@ static inline void mca_btl_vader_fbox_set_header (mca_btl_vader_fbox_hdr_t *hdr,
5252{
5353 mca_btl_vader_fbox_hdr_t tmp = {.data = {.tag = tag , .seq = seq , .size = size }};
5454 hdr -> ival = tmp .ival ;
55+ opal_atomic_wmb ();
5556}
5657
5758/* attempt to reserve a contiguous segment from the remote ep */
58- static inline unsigned char * mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t * ep , size_t size )
59+ static inline bool mca_btl_vader_fbox_sendi (mca_btl_base_endpoint_t * ep , unsigned char tag ,
60+ void * restrict header , const size_t header_size ,
61+ void * restrict payload , const size_t payload_size )
5962{
6063 const unsigned int fbox_size = mca_btl_vader_component .fbox_size ;
64+ size_t size = header_size + payload_size ;
6165 unsigned int start , end , buffer_free ;
6266 size_t data_size = size ;
63- unsigned char * dst ;
67+ unsigned char * dst , * data ;
6468 bool hbs , hbm ;
6569
6670 /* don't try to use the per-peer buffer for messages that will fill up more than 25% of the buffer */
6771 if (OPAL_UNLIKELY (NULL == ep -> fbox_out .buffer || size > (fbox_size >> 2 ))) {
68- return NULL ;
72+ return false ;
6973 }
7074
7175 OPAL_THREAD_LOCK (& ep -> lock );
@@ -119,15 +123,23 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t
119123 ep -> fbox_out .end = (hbs << 31 ) | end ;
120124 opal_atomic_wmb ();
121125 OPAL_THREAD_UNLOCK (& ep -> lock );
122- return NULL ;
126+ return false ;
123127 }
124128 }
125129
126130 BTL_VERBOSE (("writing fragment of size %u to offset %u {start: 0x%x, end: 0x%x (hbs: %d)} of peer's buffer. free = %u" ,
127131 (unsigned int ) size , end , start , end , hbs , buffer_free ));
128132
133+ data = dst + sizeof (mca_btl_vader_fbox_hdr_t );
134+
135+ memcpy (data , header , header_size );
136+ if (payload ) {
137+ /* inline sends are typically just pml headers (due to MCA_BTL_FLAGS_SEND_INPLACE) */
138+ memcpy (data + header_size , payload , payload_size );
139+ }
140+
129141 /* write out part of the header now. the tag will be written when the data is available */
130- mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR (dst ), 0 , ep -> fbox_out .seq ++ , data_size );
142+ mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR (dst ), tag , ep -> fbox_out .seq ++ , data_size );
131143
132144 end += size ;
133145
@@ -145,40 +157,6 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t
145157 opal_atomic_wmb ();
146158 OPAL_THREAD_UNLOCK (& ep -> lock );
147159
148- return dst + sizeof (mca_btl_vader_fbox_hdr_t );
149- }
150-
151- static inline void mca_btl_vader_fbox_send (unsigned char * restrict fbox , unsigned char tag )
152- {
153- /* ensure data writes have completed before we mark the data as available */
154- opal_atomic_wmb ();
155-
156- /* the header proceeds the fbox buffer */
157- MCA_BTL_VADER_FBOX_HDR ((intptr_t ) fbox )[-1 ].data .tag = tag ;
158- }
159-
160- static inline bool mca_btl_vader_fbox_sendi (mca_btl_base_endpoint_t * ep , unsigned char tag ,
161- void * restrict header , const size_t header_size ,
162- void * restrict payload , const size_t payload_size )
163- {
164- const size_t total_size = header_size + payload_size ;
165- unsigned char * restrict fbox ;
166-
167- fbox = mca_btl_vader_reserve_fbox (ep , total_size );
168- if (OPAL_UNLIKELY (NULL == fbox )) {
169- return false;
170- }
171-
172- memcpy (fbox , header , header_size );
173- if (payload ) {
174- /* inline sends are typically just pml headers (due to MCA_BTL_FLAGS_SEND_INPLACE) */
175- memcpy (fbox + header_size , payload , payload_size );
176- }
177-
178- /* mark the fbox as sent */
179- mca_btl_vader_fbox_send (fbox , tag );
180-
181- /* send complete */
182160 return true;
183161}
184162
0 commit comments