Skip to content

Commit 0bf06de

Browse files
committed
group|comm: add initial support for group sentinel values
This commit modifies ompi's process list group object to support a sentinel value for non-existant ompi_proc_t objects. The sentinel was chosen to be the negative of the opal_process_name_t of the associated ompi_proc_t. This takes advantage of the fact that on most (all?) systems the top bit of a user-space pointer is never set. If this changes then a new sentinel will be needed. In addition this commit modifies the way ompi_mpi_comm_world is initialized to fill in the group with sentinel values if the number of processes exceeds the new add_procs behavior cutoff. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent 408da16 commit 0bf06de

File tree

6 files changed

+82
-68
lines changed

6 files changed

+82
-68
lines changed

ompi/communicator/comm_cid.c

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -139,54 +139,8 @@ static opal_mutex_t ompi_cid_lock;
139139
static opal_list_t ompi_registered_comms;
140140

141141

142-
/* This variable is zero (false) if all processes in MPI_COMM_WORLD
143-
* did not require MPI_THREAD_MULTIPLE support, and is 1 (true) as
144-
* soon as at least one process requested support for THREAD_MULTIPLE */
145-
static int ompi_comm_world_thread_level_mult=0;
146-
147-
148142
int ompi_comm_cid_init (void)
149143
{
150-
#if OMPI_ENABLE_THREAD_MULTIPLE
151-
ompi_proc_t **procs, *thisproc;
152-
uint8_t thread_level;
153-
uint8_t *tlpointer;
154-
int ret;
155-
size_t i, size, numprocs;
156-
157-
/** Note that the following call only returns processes
158-
* with the same jobid. This is on purpose, since
159-
* we switch for the dynamic communicators anyway
160-
* to the original (slower) cid allocation algorithm.
161-
*/
162-
procs = ompi_proc_world ( &numprocs );
163-
164-
for ( i=0; i<numprocs; i++ ) {
165-
thisproc = procs[i];
166-
167-
OPAL_MODEX_RECV_STRING(ret, "MPI_THREAD_LEVEL",
168-
&thisproc->super.proc_name,
169-
(uint8_t**)&tlpointer, &size);
170-
if (OMPI_SUCCESS == ret) {
171-
thread_level = *((uint8_t *) tlpointer);
172-
if ( OMPI_THREADLEVEL_IS_MULTIPLE (thread_level) ) {
173-
ompi_comm_world_thread_level_mult = 1;
174-
break;
175-
}
176-
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
177-
if (ompi_mpi_thread_multiple) {
178-
ompi_comm_world_thread_level_mult = 1;
179-
}
180-
break;
181-
} else {
182-
return ret;
183-
}
184-
}
185-
free(procs);
186-
#else
187-
ompi_comm_world_thread_level_mult = 0; // silence compiler warning if not used
188-
#endif
189-
190144
return OMPI_SUCCESS;
191145
}
192146

ompi/communicator/comm_init.c

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* Copyright (c) 2006-2010 University of Houston. All rights reserved.
1414
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
1515
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
16-
* Copyright (c) 2012-2014 Los Alamos National Security, LLC.
16+
* Copyright (c) 2012-2015 Los Alamos National Security, LLC.
1717
* All rights reserved.
1818
* Copyright (c) 2011-2013 Inria. All rights reserved.
1919
* Copyright (c) 2011-2013 Universite Bordeaux 1
@@ -102,12 +102,25 @@ int ompi_comm_init(void)
102102
OBJ_CONSTRUCT(&ompi_mpi_comm_world, ompi_communicator_t);
103103
assert(ompi_mpi_comm_world.comm.c_f_to_c_index == 0);
104104
group = OBJ_NEW(ompi_group_t);
105-
group->grp_proc_pointers = ompi_proc_world(&size);
106-
group->grp_proc_count = (int)size;
105+
106+
size = ompi_process_info.num_procs;
107+
group->grp_proc_pointers = (ompi_proc_t **) calloc (size, sizeof (ompi_proc_t *));
108+
group->grp_proc_count = size;
109+
110+
for (size_t i = 0 ; i < size ; ++i) {
111+
opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid};
112+
group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_lookup (name);
113+
if (NULL == group->grp_proc_pointers[i]) {
114+
/* set sentinel value */
115+
group->grp_proc_pointers[i] = (ompi_proc_t *)(-*((intptr_t *) &name));
116+
} else {
117+
OBJ_RETAIN (ompi_proc_local_proc);
118+
}
119+
}
120+
107121
OMPI_GROUP_SET_INTRINSIC (group);
108122
OMPI_GROUP_SET_DENSE (group);
109123
ompi_set_group_rank(group, ompi_proc_local());
110-
ompi_group_increment_proc_count (group);
111124

112125
ompi_mpi_comm_world.comm.c_contextid = 0;
113126
ompi_mpi_comm_world.comm.c_id_start_index = 4;

ompi/group/group.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,3 +568,23 @@ int ompi_group_compare(ompi_group_t *group1,
568568

569569
return return_value;
570570
}
571+
572+
bool ompi_group_have_remote_peers (ompi_group_t *group)
573+
{
574+
for (size_t i = 0 ; i < group->grp_proc_count ; ++i) {
575+
ompi_proc_t *proc = NULL;
576+
#if OMPI_GROUP_SPARSE
577+
proc = ompi_group_peer_lookup (group, i);
578+
#else
579+
if ((intptr_t) group->grp_proc_pointers[i] < 0) {
580+
return true;
581+
}
582+
proc = group->grp_proc_pointers[i];
583+
#endif
584+
if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
585+
return true;
586+
}
587+
}
588+
589+
return false;
590+
}

ompi/group/group.h

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
1515
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
1616
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
17-
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
17+
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
1818
* reserved.
1919
* $COPYRIGHT$
2020
*
@@ -339,10 +339,38 @@ static inline struct ompi_proc_t* ompi_group_peer_lookup(ompi_group_t *group, in
339339
#if OMPI_GROUP_SPARSE
340340
return ompi_group_get_proc_ptr (group, peer_id);
341341
#else
342+
if (OPAL_UNLIKELY((intptr_t) group->grp_proc_pointers[peer_id] < 0)) {
343+
intptr_t sentinel = -(intptr_t) group->grp_proc_pointers[peer_id];
344+
/* replace sentinel value with an actual ompi_proc_t */
345+
group->grp_proc_pointers[peer_id] =
346+
(ompi_proc_t *) ompi_proc_for_name (*((opal_process_name_t *) &sentinel));
347+
OBJ_RETAIN(group->grp_proc_pointers[peer_id]);
348+
}
342349
return group->grp_proc_pointers[peer_id];
343350
#endif
344351
}
345352

353+
static inline struct ompi_proc_t *ompi_group_peer_lookup_existing (ompi_group_t *group, int peer_id)
354+
{
355+
#if OPAL_ENABLE_DEBUG
356+
if (peer_id >= group->grp_proc_count) {
357+
opal_output(0, "ompi_group_peer_lookup_existing: invalid peer index (%d)", peer_id);
358+
return (struct ompi_proc_t *) NULL;
359+
}
360+
#endif
361+
#if OMPI_GROUP_SPARSE
362+
return ompi_group_get_proc_ptr (group, peer_id);
363+
#else
364+
if (OPAL_UNLIKELY((intptr_t) group->grp_proc_pointers[peer_id] < 0)) {
365+
return NULL;
366+
}
367+
368+
return group->grp_proc_pointers[peer_id];
369+
#endif
370+
}
371+
372+
bool ompi_group_have_remote_peers (ompi_group_t *group);
373+
346374
/**
347375
* Function to print the group info
348376
*/

ompi/group/group_init.c

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -210,14 +210,13 @@ ompi_group_t *ompi_group_allocate_bmap(int orig_group_size , int group_size)
210210
*/
211211
void ompi_group_increment_proc_count(ompi_group_t *group)
212212
{
213-
int proc;
214213
ompi_proc_t * proc_pointer;
215-
for (proc = 0; proc < group->grp_proc_count; proc++) {
216-
proc_pointer = ompi_group_peer_lookup(group,proc);
217-
OBJ_RETAIN(proc_pointer);
214+
for (int proc = 0 ; proc < group->grp_proc_count ; ++proc) {
215+
proc_pointer = ompi_group_peer_lookup_existing (group, proc);
216+
if (proc_pointer) {
217+
OBJ_RETAIN(proc_pointer);
218+
}
218219
}
219-
220-
return;
221220
}
222221

223222
/*
@@ -226,14 +225,13 @@ void ompi_group_increment_proc_count(ompi_group_t *group)
226225

227226
void ompi_group_decrement_proc_count(ompi_group_t *group)
228227
{
229-
int proc;
230228
ompi_proc_t * proc_pointer;
231-
for (proc = 0; proc < group->grp_proc_count; proc++) {
232-
proc_pointer = ompi_group_peer_lookup(group,proc);
233-
OBJ_RELEASE(proc_pointer);
229+
for (int proc = 0 ; proc < group->grp_proc_count ; ++proc) {
230+
proc_pointer = ompi_group_peer_lookup_existing (group, proc);
231+
if (proc_pointer) {
232+
OBJ_RELEASE(proc_pointer);
233+
}
234234
}
235-
236-
return;
237235
}
238236

239237
/*

ompi/group/group_set_rank.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
12
/*
23
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
34
* University Research and Technology
@@ -10,6 +11,8 @@
1011
* Copyright (c) 2004-2005 The Regents of the University of California.
1112
* All rights reserved.
1213
* Copyright (c) 2006-2007 University of Houston. All rights reserved.
14+
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
15+
* reserved.
1316
* $COPYRIGHT$
1417
*
1518
* Additional copyrights may follow
@@ -38,12 +41,10 @@ void ompi_set_group_rank(ompi_group_t *group, struct ompi_proc_t *proc_pointer)
3841
for (proc = 0; proc < group->grp_proc_count; proc++) {
3942
/* check and see if this proc pointer matches proc_pointer
4043
*/
41-
if (ompi_group_peer_lookup(group,proc) == proc_pointer) {
44+
if (ompi_group_peer_lookup_existing (group, proc) == proc_pointer) {
4245
group->grp_my_rank = proc;
43-
}
46+
break;
47+
}
4448
} /* end proc loop */
4549
}
46-
47-
/* return */
48-
return;
4950
}

0 commit comments

Comments
 (0)