Skip to content

Commit 36fd37b

Browse files
committed
sharedfp/lockedfile and sm: fix the namecollision
this fixes the issue reported by Nicolas Joly on the mailing: the sharedfp/lockedfile component does not support right now a scenario where multiple jobs read from the same input file, due to a collision of the filenames utilized for the sharedfp handle. Although not part of the oroginal report, the same occurs for the sharedfp/sm component. Add therefore the jobid to be part of the lockedfilename/sm file name. Fixes: #3098 Signed-off-by: Edgar Gabriel <[email protected]>
1 parent d1fed77 commit 36fd37b

File tree

2 files changed

+23
-5
lines changed

2 files changed

+23
-5
lines changed

ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
* University of Stuttgart. All rights reserved.
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
12-
* Copyright (c) 2013-2016 University of Houston. All rights reserved.
12+
* Copyright (c) 2013-2017 University of Houston. All rights reserved.
1313
* Copyright (c) 2015 Research Organization for Information Science
1414
* and Technology (RIST). All rights reserved.
1515
* $COPYRIGHT$
@@ -25,6 +25,8 @@
2525

2626
#include "mpi.h"
2727
#include "ompi/constants.h"
28+
#include "ompi/group/group.h"
29+
#include "ompi/proc/proc.h"
2830
#include "ompi/mca/sharedfp/sharedfp.h"
2931
#include "ompi/mca/sharedfp/base/base.h"
3032

@@ -99,8 +101,19 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm,
99101
return OMPI_ERR_OUT_OF_RESOURCE;
100102
}
101103

102-
lockedfilename = (char*)malloc(sizeof(char) * (strlen(filename) + 64));
103-
sprintf(lockedfilename,"%s%s",filename,".lockedfile");
104+
ompi_proc_t *masterproc = ompi_group_peer_lookup(comm->c_local_group, 0 );
105+
opal_process_name_t *mastername = &(masterproc->super.proc_name);
106+
opal_jobid_t masterjobid = mastername->jobid;
107+
108+
size_t filenamelen = strlen(filename) + 16;
109+
lockedfilename = (char*)malloc(sizeof(char) * filenamelen);
110+
if ( NULL == lockedfilename ) {
111+
free (shfileHandle);
112+
free (sh);
113+
free (module_data);
114+
return OMPI_ERR_OUT_OF_RESOURCE;
115+
}
116+
snprintf(lockedfilename, filenamelen, "%s-%u%s",filename,masterjobid,".lock");
104117
module_data->filename = lockedfilename;
105118

106119
/*-------------------------------------------------*/

ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
* University of Stuttgart. All rights reserved.
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
12-
* Copyright (c) 2013-2016 University of Houston. All rights reserved.
12+
* Copyright (c) 2013-2017 University of Houston. All rights reserved.
1313
* Copyright (c) 2013 Intel, Inc. All rights reserved.
1414
* Copyright (c) 2015 Research Organization for Information Science
1515
* and Technology (RIST). All rights reserved.
@@ -35,6 +35,8 @@
3535

3636
#include "mpi.h"
3737
#include "ompi/constants.h"
38+
#include "ompi/group/group.h"
39+
#include "ompi/proc/proc.h"
3840
#include "ompi/mca/sharedfp/sharedfp.h"
3941
#include "ompi/mca/sharedfp/base/base.h"
4042

@@ -139,8 +141,11 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
139141
free(shfileHandle);
140142
return OMPI_ERR_OUT_OF_RESOURCE;
141143
}
142-
sprintf(sm_filename,"/tmp/OMPIO_sharedfp_sm_%s%s",filename_basename,".sm");
144+
ompi_proc_t *masterproc = ompi_group_peer_lookup(comm->c_local_group, 0 );
145+
opal_process_name_t *mastername = &(masterproc->super.proc_name);
146+
opal_jobid_t masterjobid = mastername->jobid;
143147

148+
sprintf(sm_filename,"/tmp/OMPIO_%s_%d_%s",filename_basename, masterjobid, ".sm");
144149
/* open shared memory file, initialize to 0, map into memory */
145150
sm_fd = open(sm_filename, O_RDWR | O_CREAT,
146151
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);

0 commit comments

Comments
 (0)