Skip to content

Commit 60ce984

Browse files
authored
Merge pull request #5609 from edgargabriel/pr/sharedfp-naming-conflict-v3.1
sharedfp/sm and lockedfile: fix naming bug
2 parents 67e5af4 + 154c9ce commit 60ce984

File tree

2 files changed

+38
-6
lines changed

2 files changed

+38
-6
lines changed

ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <sys/stat.h>
3636
#endif
3737
#include <fcntl.h>
38+
#include <unistd.h>
3839

3940
int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm,
4041
const char* filename,
@@ -50,6 +51,9 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm,
5051
mca_io_ompio_file_t * shfileHandle, *ompio_fh;
5152
mca_io_ompio_data_t *data;
5253

54+
pid_t my_pid;
55+
int int_pid;
56+
5357
/*------------------------------------------------------------*/
5458
/*Open the same file again without shared file pointer support*/
5559
/*------------------------------------------------------------*/
@@ -110,15 +114,27 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm,
110114
comm->c_coll->coll_bcast ( &masterjobid, 1, MPI_UNSIGNED, 0, comm,
111115
comm->c_coll->coll_bcast_module );
112116

113-
size_t filenamelen = strlen(filename) + 16;
117+
if ( 0 == fh->f_rank ) {
118+
my_pid = getpid();
119+
int_pid = (int) my_pid;
120+
}
121+
err = comm->c_coll->coll_bcast (&int_pid, 1, MPI_INT, 0, comm, comm->c_coll->coll_bcast_module );
122+
if ( OMPI_SUCCESS != err ) {
123+
opal_output(0, "[%d]mca_sharedfp_lockedfile_file_open: Error in bcast operation\n", fh->f_rank);
124+
free (sh);
125+
free(module_data);
126+
return err;
127+
}
128+
129+
size_t filenamelen = strlen(filename) + 24;
114130
lockedfilename = (char*)malloc(sizeof(char) * filenamelen);
115131
if ( NULL == lockedfilename ) {
116132
free (shfileHandle);
117133
free (sh);
118134
free (module_data);
119135
return OMPI_ERR_OUT_OF_RESOURCE;
120136
}
121-
snprintf(lockedfilename, filenamelen, "%s-%u%s",filename,masterjobid,".lock");
137+
snprintf(lockedfilename, filenamelen, "%s-%u-%d%s",filename,masterjobid,int_pid,".lock");
122138
module_data->filename = lockedfilename;
123139

124140
/*-------------------------------------------------*/

ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
#include <semaphore.h>
4545
#include <sys/mman.h>
4646
#include <libgen.h>
47-
47+
#include <unistd.h>
4848

4949
int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
5050
const char* filename,
@@ -65,7 +65,9 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
6565
int sm_fd;
6666
int rank;
6767
uint32_t comm_cid;
68-
68+
int int_pid;
69+
pid_t my_pid;
70+
6971
/*----------------------------------------------------*/
7072
/*Open the same file again without shared file pointer*/
7173
/*----------------------------------------------------*/
@@ -134,7 +136,7 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
134136
** For sharedfp we also want to put the file backed shared memory into the tmp directory
135137
*/
136138
filename_basename = basename(filename);
137-
/* format is "%s/%s_cid-%d.sm", see below */
139+
/* format is "%s/%s_cid-%d-%d.sm", see below */
138140
sm_filename_length = strlen(ompi_process_info.job_session_dir) + 1 + strlen(filename_basename) + 5 + (3*sizeof(uint32_t)+1) + 4;
139141
sm_filename = (char*) malloc( sizeof(char) * sm_filename_length);
140142
if (NULL == sm_filename) {
@@ -146,7 +148,21 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
146148
}
147149

148150
comm_cid = ompi_comm_get_cid(comm);
149-
sprintf(sm_filename, "%s/%s_cid-%d.sm", ompi_process_info.job_session_dir, filename_basename, comm_cid);
151+
if ( 0 == fh->f_rank ) {
152+
my_pid = getpid();
153+
int_pid = (int) my_pid;
154+
}
155+
err = comm->c_coll->coll_bcast (&int_pid, 1, MPI_INT, 0, comm, comm->c_coll->coll_bcast_module );
156+
if ( OMPI_SUCCESS != err ) {
157+
opal_output(0,"mca_sharedfp_sm_file_open: Error in bcast operation \n");
158+
free(sm_filename);
159+
free(sm_data);
160+
free(sh);
161+
return err;
162+
}
163+
snprintf(sm_filename, sm_filename_length, "%s/%s_cid-%d-%d.sm", ompi_process_info.job_session_dir,
164+
filename_basename, comm_cid, int_pid);
165+
150166
/* open shared memory file, initialize to 0, map into memory */
151167
sm_fd = open(sm_filename, O_RDWR | O_CREAT,
152168
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);

0 commit comments

Comments
 (0)