Skip to content
Merged
46 changes: 46 additions & 0 deletions easybuild/easyconfigs/o/OpenMPI/OpenMPI-4.1.1-GCC-11.2.0.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name = 'OpenMPI'
version = '4.1.1'

homepage = 'https://www.open-mpi.org/'
description = """The Open MPI Project is an open source MPI-3 implementation."""

toolchain = {'name': 'GCC', 'version': '11.2.0'}

source_urls = ['https://www.open-mpi.org/software/ompi/v%(version_major_minor)s/downloads']
sources = [SOURCELOWER_TAR_BZ2]
patches = [
'OpenMPI-4.1.1_fix-bufferoverflow-in-common_ofi.patch',
'OpenMPI-4.0.6_remove-pmix-check-in-pmi-switch.patch',
'OpenMPI-4.1.1_opal-pmix-package-rank.patch',
'OpenMPI-4.1.1_pmix3x-protection.patch',
]
checksums = [
'e24f7a778bd11a71ad0c14587a7f5b00e68a71aa5623e2157bafee3d44c07cda', # openmpi-4.1.1.tar.bz2
# OpenMPI-4.1.1_fix-bufferoverflow-in-common_ofi.patch
'a189d834506f3d7c31eda6aa184598a3631ea24a94bc551d5ed1f053772ca49e',
# OpenMPI-4.0.6_remove-pmix-check-in-pmi-switch.patch
'8acee6c9b2b4bf12873a39b85a58ca669de78e90d26186e52f221bb4853abc4d',
'04353672cf7be031e5306c94068d7012d99e6cd94b69d93230797ffcd7f31903', # OpenMPI-4.1.1_opal-pmix-package-rank.patch
'384ef9f1fa803b0d71dae2ec0748d0f20295992437532afedf21478bda164ff8', # OpenMPI-4.1.1_pmix3x-protection.patch
]

builddependencies = [
('pkg-config', '0.29.2'),
]

dependencies = [
('zlib', '1.2.11'),
('hwloc', '2.5.0'),
('libevent', '2.1.12'),
('UCX', '1.11.0'),
('libfabric', '1.13.0'),
('PMIx', '4.1.0'),
]

# disable MPI1 compatibility for now, see what breaks...
# configopts = '--enable-mpi1-compatibility '

# to enable SLURM integration (site-specific)
# configopts += '--with-slurm --with-pmi=/usr/include/slurm --with-pmi-libdir=/usr'

moduleclass = 'mpi'
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
see https://github.com/open-mpi/ompi/pull/9212

From 8b07775d1002cc9be887c41768aea9ac896c5ecd Mon Sep 17 00:00:00 2001
From: Ralph Castain <[email protected]>
Date: Tue, 10 Aug 2021 08:38:15 -0700
Subject: [PATCH] Use an OPAL-prefixed abstraction for PMIX_PACKAGE_RANK

If someone configures against PMIx v4.1 or above, the configure
logic will correctly detect the presence of PMIX_PACKAGE_RANK.
However, the internal code only includes the opal/mca/pmix headers
and thus only the OPAL-prefixed PMIx abstractions are available.
Add an OPAL_PMIX_PACKAGE_RANK abstraction and update the common/ofi
code to use it.

Signed-off-by: Ralph Castain <[email protected]>
---
opal/mca/common/ofi/common_ofi.c | 6 ++----
opal/mca/pmix/pmix_types.h | 2 ++
2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/opal/mca/common/ofi/common_ofi.c b/opal/mca/common/ofi/common_ofi.c
index 12bed7eb6b8..769ecb75239 100644
--- a/opal/mca/common/ofi/common_ofi.c
+++ b/opal/mca/common/ofi/common_ofi.c
@@ -312,19 +312,17 @@ static uint32_t get_package_rank(int32_t num_local_peers, uint16_t my_local_rank
char *local_peers = NULL;
char *locality_string = NULL;
char *mylocality = NULL;
+ uint16_t *package_rank_ptr;

pname.jobid = OPAL_PROC_MY_NAME.jobid;
pname.vpid = OPAL_VPID_WILDCARD;

-#if HAVE_DECL_PMIX_PACKAGE_RANK
- uint16_t *package_rank_ptr;
// Try to get the PACKAGE_RANK from PMIx
- OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_PACKAGE_RANK,
+ OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_PACKAGE_RANK,
&pname, &package_rank_ptr, OPAL_UINT16);
if (OPAL_SUCCESS == rc) {
return (uint32_t)*package_rank_ptr;
}
-#endif

// Get the local peers
OPAL_MODEX_RECV_VALUE(rc, OPAL_PMIX_LOCAL_PEERS,
diff --git a/opal/mca/pmix/pmix_types.h b/opal/mca/pmix/pmix_types.h
index d2643b2e549..816e386cbf1 100644
--- a/opal/mca/pmix/pmix_types.h
+++ b/opal/mca/pmix/pmix_types.h
@@ -2,6 +2,7 @@
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
+ * Copyright (c) 2021 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -130,6 +131,7 @@ BEGIN_C_DECLS
#define OPAL_PMIX_NPROC_OFFSET "pmix.offset" // (uint32_t) starting global rank of this job
#define OPAL_PMIX_LOCAL_RANK "pmix.lrank" // (uint16_t) rank on this node within this job
#define OPAL_PMIX_NODE_RANK "pmix.nrank" // (uint16_t) rank on this node spanning all jobs
+#define OPAL_PMIX_PACKAGE_RANK "pmix.pkgrank" // (uint16_t) rank within this job on the package where this proc resides
#define OPAL_PMIX_LOCALLDR "pmix.lldr" // (uint64_t) opal_identifier of lowest rank on this node within this job
#define OPAL_PMIX_APPLDR "pmix.aldr" // (uint32_t) lowest rank in this app within this job
#define OPAL_PMIX_PROC_PID "pmix.ppid" // (pid_t) pid of specified proc
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
see https://github.com/open-mpi/ompi/pull/9213

From bd48a06e25c1058b7d3c3ba4414af388836c8219 Mon Sep 17 00:00:00 2001
From: Ralph Castain <[email protected]>
Date: Tue, 10 Aug 2021 13:01:11 -0700
Subject: [PATCH] Protect the pmix3x component from a PMIx v4 attribute

PMIx v4 introduced a new PMIX_TOPO2 attribute that takes a data type
unknown to PMIx 3. Unfortunately, that attribute can be provided by
the RM without our knowledge. We will update PMIx to try and detect
it and prevent it from slipping down to this level. Meantime, add
some simple protection here.

Signed-off-by: Ralph Castain <[email protected]>
---
opal/mca/pmix/pmix3x/pmix3x.c | 29 +++++++++++++++++------------
1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/opal/mca/pmix/pmix3x/pmix3x.c b/opal/mca/pmix/pmix3x/pmix3x.c
index 90670f6adb6..3b2a5353cc6 100644
--- a/opal/mca/pmix/pmix3x/pmix3x.c
+++ b/opal/mca/pmix/pmix3x/pmix3x.c
@@ -8,6 +8,7 @@
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
* reserved.
+ * Copyright (c) 2021 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -966,6 +967,7 @@ int pmix3x_value_unload(opal_value_t *kv,
opal_list_t *lt;
opal_value_t *ival;
size_t n;
+ pmix_info_t *iptr;

switch(v->type) {
case PMIX_UNDEF:
@@ -1178,27 +1180,30 @@ int pmix3x_value_unload(opal_value_t *kv,
kv->data.pinfo.state = pmix3x_convert_state(v->data.pinfo->state);
break;
case PMIX_DATA_ARRAY:
- if (NULL == v->data.darray || NULL == v->data.darray->array) {
+ if (NULL == v->data.darray || NULL == v->data.darray->array ||
+ PMIX_INFO != v->data.darray->type) {
kv->data.ptr = NULL;
break;
}
lt = OBJ_NEW(opal_list_t);
kv->type = OPAL_PTR;
kv->data.ptr = (void*)lt;
+ iptr = (pmix_info_t*)v->data.darray->array;
for (n=0; n < v->data.darray->size; n++) {
+ if (0 == strcmp("pmix.topo2", iptr[n].key)) {
+ /* we do not know (yet) how to convert the pmix.topo2 key from PMIx 4.0.0
+ * but since we are not going to use it, simply ignore it and move on */
+ continue;
+ }
ival = OBJ_NEW(opal_value_t);
opal_list_append(lt, &ival->super);
- /* handle the various types */
- if (PMIX_INFO == v->data.darray->type) {
- pmix_info_t *iptr = (pmix_info_t*)v->data.darray->array;
- ival->key = strdup(iptr[n].key);
- rc = pmix3x_value_unload(ival, &iptr[n].value);
- if (OPAL_SUCCESS != rc) {
- OPAL_LIST_RELEASE(lt);
- kv->type = OPAL_UNDEF;
- kv->data.ptr = NULL;
- break;
- }
+ ival->key = strdup(iptr[n].key);
+ rc = pmix3x_value_unload(ival, &iptr[n].value);
+ if (OPAL_SUCCESS != rc) {
+ OPAL_LIST_RELEASE(lt);
+ kv->type = OPAL_UNDEF;
+ kv->data.ptr = NULL;
+ break;
}
}
break;
46 changes: 46 additions & 0 deletions easybuild/easyconfigs/p/PMIx/PMIx-4.1.0-GCCcore-11.2.0.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
easyblock = 'ConfigureMake'

name = 'PMIx'
version = '4.1.0'

homepage = 'https://pmix.org/'
description = """Process Management for Exascale Environments
PMI Exascale (PMIx) represents an attempt to
provide an extended version of the PMI standard specifically designed
to support clusters up to and including exascale sizes. The overall
objective of the project is not to branch the existing pseudo-standard
definitions - in fact, PMIx fully supports both of the existing PMI-1
and PMI-2 APIs - but rather to (a) augment and extend those APIs to
eliminate some current restrictions that impact scalability, and (b)
provide a reference implementation of the PMI-server that demonstrates
the desired level of scalability.
"""

toolchain = {'name': 'GCCcore', 'version': '11.2.0'}
toolchainopts = {'pic': True}

source_urls = ['https://github.com/openpmix/openpmix/releases/download/v%(version)s']
sources = ['%(namelower)s-%(version)s.tar.bz2']
checksums = ['145f05a6c621bfb3fc434776b615d7e6d53260cc9ba340a01f55b383e07c842e']

builddependencies = [('binutils', '2.37')]

dependencies = [
('libevent', '2.1.12'),
('zlib', '1.2.11'),
('hwloc', '2.5.0'),
]

configopts = ' --with-libevent=$EBROOTLIBEVENT --with-zlib=$EBROOTZLIB'
configopts += ' --with-hwloc=$EBROOTHWLOC'
configopts += ' --enable-pmix-binaries'
configopts += ' --disable-man-pages'

buildopts = 'V=1'

sanity_check_paths = {
'files': ['bin/pevent', 'bin/plookup', 'bin/pmix_info', 'bin/pps'],
'dirs': ['etc', 'include', 'lib', 'share']
}

moduleclass = 'lib'