Skip to content

Commit 79aef93

Browse files
author
Ralph Castain
authored
Merge pull request #4371 from rhc54/topic/xvr
Updates to support cross-version operations with OMPI v2.x
2 parents defe739 + a63904d commit 79aef93

File tree

7 files changed

+60
-27
lines changed

7 files changed

+60
-27
lines changed

opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/bfrop_v12.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,11 @@ int pmix12_v2_to_v1_datatype(pmix_data_type_t v2type)
421421
v1type = 6;
422422
break;
423423

424-
case 22:
424+
case 39:
425+
/* data arrays must be converted to info arrays */
426+
v1type = 22;
427+
break;
428+
425429
case 23:
426430
case 24:
427431
case 25:
@@ -494,6 +498,10 @@ pmix_status_t pmix12_bfrop_get_data_type(pmix_buffer_t *buffer, pmix_data_type_t
494498
pmix_status_t rc;
495499

496500
rc = pmix12_bfrop_unpack_datatype(buffer, &v1type, &n, PMIX_INT);
501+
if (UINT16_MAX < v1type) {
502+
*type = 0;
503+
return PMIX_ERR_UNKNOWN_DATA_TYPE;
504+
}
497505
if (PMIX_SUCCESS == rc) {
498506
*type = pmix12_v1_to_v2_datatype(v1type);
499507
}

opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/pack.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ pmix_status_t pmix12_bfrop_pack_buffer(pmix_buffer_t *buffer,
8686
case PMIX_PERSIST:
8787
v1type = PMIX_INT;
8888
break;
89+
case PMIX_INFO_ARRAY:
90+
v1type = 22;
91+
break;
8992
default:
9093
v1type = type;
9194
}
@@ -96,6 +99,11 @@ pmix_status_t pmix12_bfrop_pack_buffer(pmix_buffer_t *buffer,
9699
return rc;
97100
}
98101
}
102+
/* if it is an info array, we have to set the type back
103+
* so the pack routine will get the correct function */
104+
if (PMIX_INFO_ARRAY == type) {
105+
v1type = PMIX_INFO_ARRAY;
106+
}
99107

100108
/* Lookup the pack function for this type and call it */
101109

@@ -436,6 +444,7 @@ static pmix_status_t pack_val(pmix_buffer_t *buffer,
436444
{
437445
pmix_status_t ret;
438446
pmix_info_array_t array;
447+
int rank;
439448

440449
switch (p->type) {
441450
case PMIX_BOOL:
@@ -529,7 +538,7 @@ static pmix_status_t pack_val(pmix_buffer_t *buffer,
529538
}
530539
break;
531540
case PMIX_INFO_ARRAY:
532-
if (PMIX_SUCCESS != (ret = pmix12_bfrop_pack_buffer(buffer, p->data.array, 1, PMIX_INFO_ARRAY))) {
541+
if (PMIX_SUCCESS != (ret = pmix12_bfrop_pack_buffer(buffer, &p->data.array, 1, PMIX_INFO_ARRAY))) {
533542
return ret;
534543
}
535544
break;
@@ -550,6 +559,14 @@ static pmix_status_t pack_val(pmix_buffer_t *buffer,
550559
}
551560
break;
552561

562+
case PMIX_PROC_RANK:
563+
/* must convert this to an int */
564+
rank = p->data.rank;
565+
if (PMIX_SUCCESS != (ret = pmix12_bfrop_pack_buffer(buffer, &rank, 1, PMIX_INT))) {
566+
return ret;
567+
}
568+
break;
569+
553570
default:
554571
pmix_output(0, "PACK-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)p->type);
555572
return PMIX_ERROR;

opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/unpack.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,8 @@ pmix_status_t pmix12_bfrop_unpack_buffer(pmix_buffer_t *buffer, void *dst, int32
188188
}
189189
/* if the data types don't match, then return an error */
190190
if (v1type != local_type) {
191-
pmix_output(0, "PMIX bfrop:unpack: got type %d when expecting type %d", local_type, v1type);
191+
pmix_output_verbose(1, pmix_bfrops_base_framework.framework_output,
192+
"PMIX bfrop:unpack: got type %d when expecting type %d", local_type, v1type);
192193
return PMIX_ERR_PACK_MISMATCH;
193194
}
194195
}

opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,7 +1157,6 @@ static pmix_status_t hash_store_modex(struct pmix_nspace_t *nspace,
11571157
pmix_buffer_t pbkt;
11581158
pmix_proc_t proc;
11591159
pmix_kval_t *kv;
1160-
pmix_peer_t *peer;
11611160

11621161
pmix_output_verbose(2, pmix_gds_base_framework.framework_output,
11631162
"[%s:%d] gds:hash:store_modex for nspace %s",
@@ -1184,26 +1183,15 @@ static pmix_status_t hash_store_modex(struct pmix_nspace_t *nspace,
11841183
* REMOTE/GLOBAL data. The byte object contains
11851184
* the rank followed by pmix_kval_t's. The list of callbacks
11861185
* contains all local participants. */
1187-
peer = NULL;
1188-
PMIX_LIST_FOREACH(scd, cbs, pmix_server_caddy_t) {
1189-
if (scd->peer->nptr == ns) {
1190-
peer = scd->peer;
1191-
break;
1192-
}
1193-
}
1194-
if (NULL == peer) {
1195-
/* we can ignore this one */
1196-
return PMIX_SUCCESS;
1197-
}
11981186

11991187
/* setup the byte object for unpacking */
12001188
PMIX_CONSTRUCT(&pbkt, pmix_buffer_t);
12011189
/* the next step unfortunately NULLs the byte object's
12021190
* entries, so we need to ensure we restore them! */
1203-
PMIX_LOAD_BUFFER(peer, &pbkt, bo->bytes, bo->size);
1191+
PMIX_LOAD_BUFFER(pmix_globals.mypeer, &pbkt, bo->bytes, bo->size);
12041192
/* unload the proc that provided this data */
12051193
cnt = 1;
1206-
PMIX_BFROPS_UNPACK(rc, peer, &pbkt, &proc, &cnt, PMIX_PROC);
1194+
PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, &pbkt, &proc, &cnt, PMIX_PROC);
12071195
if (PMIX_SUCCESS != rc) {
12081196
PMIX_ERROR_LOG(rc);
12091197
bo->bytes = pbkt.base_ptr;
@@ -1215,7 +1203,7 @@ static pmix_status_t hash_store_modex(struct pmix_nspace_t *nspace,
12151203
/* unpack the remaining values until we hit the end of the buffer */
12161204
cnt = 1;
12171205
kv = PMIX_NEW(pmix_kval_t);
1218-
PMIX_BFROPS_UNPACK(rc, peer, &pbkt, kv, &cnt, PMIX_KVAL);
1206+
PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, &pbkt, kv, &cnt, PMIX_KVAL);
12191207
while (PMIX_SUCCESS == rc) {
12201208
/* store this in the hash table */
12211209
if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->remote, proc.rank, kv))) {
@@ -1230,7 +1218,7 @@ static pmix_status_t hash_store_modex(struct pmix_nspace_t *nspace,
12301218
/* continue along */
12311219
kv = PMIX_NEW(pmix_kval_t);
12321220
cnt = 1;
1233-
PMIX_BFROPS_UNPACK(rc, peer, &pbkt, kv, &cnt, PMIX_KVAL);
1221+
PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, &pbkt, kv, &cnt, PMIX_KVAL);
12341222
}
12351223
PMIX_RELEASE(kv); // maintain accounting
12361224
if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {

opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,16 @@ void pmix_server_execute_collective(int sd, short args, void *cbdata)
563563
* each participant. */
564564
peer = pmix_globals.mypeer;
565565
} else {
566+
/* in some error situations, the list of local callbacks can
567+
* be empty - if that happens, we just need to call the fence
568+
* function to prevent others from hanging */
569+
if (0 == pmix_list_get_size(&trk->local_cbs)) {
570+
pmix_host_server.fence_nb(trk->pcs, trk->npcs,
571+
trk->info, trk->ninfo,
572+
data, sz, trk->modexcbfunc, trk);
573+
PMIX_RELEASE(tcd);
574+
return;
575+
}
566576
/* since all procs are the same, just use the first proc's module */
567577
cd = (pmix_server_caddy_t*)pmix_list_get_first(&trk->local_cbs);
568578
peer = cd->peer;
@@ -1676,8 +1686,6 @@ static void _mdxcbfunc(int sd, short argc, void *cbdata)
16761686
}
16771687
if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) {
16781688
rc = PMIX_SUCCESS;
1679-
} else if (PMIX_SUCCESS != rc) {
1680-
PMIX_ERROR_LOG(rc);
16811689
}
16821690

16831691
finish_collective:

orte/orted/pmix/pmix_server.c

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,14 @@ void pmix_server_register_params(void)
148148
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
149149
&orte_pmix_server_globals.wait_for_server);
150150

151+
/* whether or not to support legacy usock connections as well as tcp */
152+
orte_pmix_server_globals.legacy = false;
153+
(void) mca_base_var_register ("orte", "pmix", NULL, "server_usock_connections",
154+
"Whether or not to support legacy usock connections",
155+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
156+
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
157+
&orte_pmix_server_globals.legacy);
158+
151159
/* whether or not to drop a session-level tool rendezvous point */
152160
orte_pmix_server_globals.session_server = false;
153161
(void) mca_base_var_register ("orte", "pmix", NULL, "session_server",
@@ -250,12 +258,14 @@ int pmix_server_init(void)
250258
kv->type = OPAL_STRING;
251259
kv->data.string = opal_os_path(false, orte_process_info.jobfam_session_dir, NULL);
252260
opal_list_append(&info, &kv->super);
253-
/* use only one listener */
254-
kv = OBJ_NEW(opal_value_t);
255-
kv->key = strdup(OPAL_PMIX_SINGLE_LISTENER);
256-
kv->type = OPAL_BOOL;
257-
kv->data.flag = true;
258-
opal_list_append(&info, &kv->super);
261+
if (!orte_pmix_server_globals.legacy) {
262+
/* use only one listener */
263+
kv = OBJ_NEW(opal_value_t);
264+
kv->key = strdup(OPAL_PMIX_SINGLE_LISTENER);
265+
kv->type = OPAL_BOOL;
266+
kv->data.flag = true;
267+
opal_list_append(&info, &kv->super);
268+
}
259269
/* tell the server to use its own internal monitoring */
260270
kv = OBJ_NEW(opal_value_t);
261271
kv->key = strdup(OPAL_PMIX_SERVER_ENABLE_MONITORING);

orte/orted/pmix/pmix_server_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ typedef struct {
265265
bool pubsub_init;
266266
bool session_server;
267267
bool system_server;
268+
bool legacy;
268269
} pmix_server_globals_t;
269270

270271
extern pmix_server_globals_t orte_pmix_server_globals;

0 commit comments

Comments
 (0)