diff --git a/config/ompi_setup_prrte.m4 b/config/ompi_setup_prrte.m4 index 3871440a0f5..a83bd618f71 100644 --- a/config/ompi_setup_prrte.m4 +++ b/config/ompi_setup_prrte.m4 @@ -46,6 +46,15 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[ [AC_HELP_STRING([--enable-prte-prefix-by-default], [Make "mpirun ..." behave exactly the same as "mpirun --prefix \$prefix" (where \$prefix is the value given to --prefix in configure) (default:enabled)])]) + AS_IF([test "$opal_external_pmix_happy" = "yes" && test $opal_numerical_pmix_version -lt 4 && test "$enable_internal_rte" != "no"], + [AC_MSG_WARN([OMPI's internal runtime environment "PRRTE" does not support]) + AC_MSG_WARN([PMIx versions less than v4.x as they lack adequate tool]) + AC_MSG_WARN([support. You can, if desired, build OMPI against an earlier]) + AC_MSG_WARN([version of PMIx for strictly direct-launch purposes - e.g., using)]) + AC_MSG_WARN([Slurm's srun to launch the job - by configuring with the]) + AC_MSG_WARN([--disable-internal-rte option.]) + AC_MSG_ERROR([Cannot continue])]) + AC_MSG_CHECKING([if RTE support is enabled]) if test "$enable_internal_rte" != "no"; then AC_MSG_RESULT([yes]) @@ -81,7 +90,7 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[ opal_prrte_prefix_arg= fi - opal_prrte_args="--prefix=$prefix --disable-dlopen $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg" + opal_prrte_args="--prefix=$prefix $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg" AS_IF([test "$enable_debug" = "yes"], [opal_prrte_args="--enable-debug $opal_prrte_args" CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"], diff --git a/config/opal_check_pmi.m4 b/config/opal_check_pmi.m4 index c5706f4bffe..6f18c02cd1c 100644 --- a/config/opal_check_pmi.m4 +++ b/config/opal_check_pmi.m4 @@ -125,6 +125,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[ ], [])], [AC_MSG_RESULT([found]) opal_external_pmix_version=4x + opal_numerical_pmix_version=4 opal_external_pmix_version_found=1 opal_external_pmix_happy=yes], [AC_MSG_RESULT([not found])])]) @@ -139,6 +140,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[ ], [])], [AC_MSG_RESULT([found]) opal_external_pmix_version=3x + opal_numerical_pmix_version=3 opal_external_pmix_version_found=1 opal_external_pmix_happy=yes], [AC_MSG_RESULT([not found])])]) @@ -153,6 +155,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[ ], [])], [AC_MSG_RESULT([found]) opal_external_pmix_version=2x + opal_numerical_pmix_version=2 opal_external_pmix_version_found=1 opal_external_pmix_happy=yes], [AC_MSG_RESULT([not found])])]) @@ -167,6 +170,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[ ], [])], [AC_MSG_RESULT([found]) opal_external_pmix_version=1x + opal_numerical_pmix_version=1 opal_external_pmix_version_found=1 opal_external_have_pmix1=1 opal_external_pmix_happy=yes], @@ -179,6 +183,12 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[ opal_external_pmix_happy=no]) ]) + AS_IF([test "$opal_external_pmix_happy" = "yes" && test $opal_numerical_pmix_version -lt 3], + [AC_MSG_WARN([OMPI no longer supports PMIx versions prior to v3]) + AC_MSG_WARN([Please direct us to a more current PMIx release or]) + AC_MSG_WARN([use the internally provided one]) + AC_MSG_ERROR([Cannot continue])]) + AS_IF([test "$opal_external_pmix_happy" = "yes"], [$3 # add the new flags to our wrapper compilers diff --git a/ompi/interlib/interlib.c b/ompi/interlib/interlib.c index 69062c5de37..da18b694199 100644 --- a/ompi/interlib/interlib.c +++ b/ompi/interlib/interlib.c @@ -121,6 +121,10 @@ int ompi_interlib_declare(int threadlevel, char *version) PMIX_INFO_DESTRUCT(&info[3]); /* account for our refcount on pmix_init */ PMIx_Finalize(NULL, 0); - ret = opal_pmix_convert_status(rc); + if (ompi_singleton && PMIX_ERR_UNREACH == rc) { + ret = OMPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + } return ret; } diff --git a/ompi/runtime/ompi_mpi_abort.c b/ompi/runtime/ompi_mpi_abort.c index a550e7b9f65..a42109b5de5 100644 --- a/ompi/runtime/ompi_mpi_abort.c +++ b/ompi/runtime/ompi_mpi_abort.c @@ -85,8 +85,8 @@ static void try_kill_peers(ompi_communicator_t *comm, procs = (ompi_process_name_t*) calloc(nprocs, sizeof(ompi_process_name_t)); if (NULL == procs) { - /* quick clean orte and get out */ - ompi_rte_abort(errno, "Abort: unable to alloc memory to kill procs"); + /* quick clean RTE and get out */ + ompi_rte_abort(errcode, "Abort: unable to alloc memory to kill procs"); } /* put all the local group procs in the abort list */ diff --git a/ompi/runtime/ompi_rte.c b/ompi/runtime/ompi_rte.c index 66a2db21edb..d459024231d 100644 --- a/ompi/runtime/ompi_rte.c +++ b/ompi/runtime/ompi_rte.c @@ -61,7 +61,27 @@ opal_process_name_t pmix_name_wildcard = {UINT32_MAX-1, UINT32_MAX-1}; opal_process_name_t pmix_name_invalid = {UINT32_MAX, UINT32_MAX}; hwloc_cpuset_t ompi_proc_applied_binding = NULL; -pmix_process_info_t pmix_process_info = {0}; +pmix_process_info_t pmix_process_info = { + .my_name = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID}, + .nodename = NULL, + .pid = 0, + .top_session_dir = NULL, + .job_session_dir = NULL, + .proc_session_dir = NULL, + .my_local_rank = 0, + .my_node_rank = 0, + .num_local_peers = 0, + .num_procs = 0, + .app_num = 0, + .univ_size = 0, + .app_sizes = NULL, + .app_ldrs = NULL, + .cpuset = NULL, + .command = NULL, + .num_apps = 0, + .initial_wdir = NULL, + .reincarnation = 0 +}; bool pmix_proc_is_bound = false; bool ompi_singleton = false; diff --git a/opal/mca/pmix/pmix4x/openpmix b/opal/mca/pmix/pmix4x/openpmix index 98d14d55f8d..a18e5313829 160000 --- a/opal/mca/pmix/pmix4x/openpmix +++ b/opal/mca/pmix/pmix4x/openpmix @@ -1 +1 @@ -Subproject commit 98d14d55f8d4bd27fe6eb1e508c336702e1fbf76 +Subproject commit a18e53138298d61a01fec4471518140304539e8c diff --git a/opal/mca/rcache/grdma/rcache_grdma_module.c b/opal/mca/rcache/grdma/rcache_grdma_module.c index c4e72c00415..3f163c2d7f1 100644 --- a/opal/mca/rcache/grdma/rcache_grdma_module.c +++ b/opal/mca/rcache/grdma/rcache_grdma_module.c @@ -187,7 +187,7 @@ static inline mca_rcache_base_registration_t *mca_rcache_grdma_remove_lru_head(m /* registration has been selected for removal and is no longer in the LRU. mark it * as such. */ new_flags = (old_flags & ~MCA_RCACHE_GRDMA_REG_FLAG_IN_LRU) | MCA_RCACHE_FLAGS_INVALID; - if (opal_atomic_compare_exchange_strong_32(&old_reg->flags, &old_flags, new_flags)) { + if (opal_atomic_compare_exchange_strong_32((opal_atomic_int32_t*)&old_reg->flags, &old_flags, new_flags)) { break; } } while (1); diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index ab17e8e9bb7..09e10a4fd57 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -54,6 +54,7 @@ #include "opal/mca/installdirs/base/base.h" #include "opal/mca/memory/base/base.h" #include "opal/mca/patcher/base/base.h" +#include "opal/mca/pmix/base/base.h" #include "opal/mca/memcpy/base/base.h" #include "opal/mca/hwloc/base/base.h" #include "opal/mca/reachable/base/base.h" @@ -630,7 +631,7 @@ opal_init_util(int* pargc, char*** pargv) static mca_base_framework_t *opal_init_frameworks[] = { &opal_hwloc_base_framework, &opal_memcpy_base_framework, &opal_memchecker_base_framework, &opal_backtrace_base_framework, &opal_timer_base_framework, &opal_event_base_framework, - &opal_shmem_base_framework, &opal_reachable_base_framework, + &opal_shmem_base_framework, &opal_reachable_base_framework, &opal_pmix_base_framework, NULL, }; diff --git a/prrte b/prrte index 9add90bcfe8..cdea5231171 160000 --- a/prrte +++ b/prrte @@ -1 +1 @@ -Subproject commit 9add90bcfe88af1994914a78544d6236327be10e +Subproject commit cdea5231171b2fdea11269033de9e265fc7f3a63