Skip to content

Commit 8e281f5

Browse files
committed
Merge remote-tracking branch 'upstream/v4.1.x' into v4.1.x_hpcx
2 parents a2b0012 + 467fbb9 commit 8e281f5

18 files changed

+224
-19
lines changed

config/opal_check_cuda.m4

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
dnl -*- shell-script -*-
22
dnl
3+
dnl Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
34
dnl Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
45
dnl University Research and Technology
56
dnl Corporation. All rights reserved.
@@ -113,6 +114,12 @@ AS_IF([test "$opal_check_cuda_happy"="yes"],
113114
[#include <$opal_cuda_incdir/cuda.h>]),
114115
[])
115116

117+
# If we have CUDA support, check to see if we have support for cuMemCreate memory on host NUMA.
118+
AS_IF([test "$opal_check_cuda_happy"="yes"],
119+
[AC_CHECK_DECL([CU_MEM_LOCATION_TYPE_HOST_NUMA], [CUDA_VMM_SUPPORT=1], [CUDA_VMM_SUPPORT=0],
120+
[#include <$opal_cuda_incdir/cuda.h>])],
121+
[])
122+
116123
AC_MSG_CHECKING([if have cuda support])
117124
if test "$opal_check_cuda_happy" = "yes"; then
118125
AC_MSG_RESULT([yes (-I$opal_cuda_incdir)])
@@ -134,6 +141,10 @@ AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"])
134141
AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS,
135142
[Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available])
136143

144+
AM_CONDITIONAL([OPAL_cuda_vmm_support], [test "x$CUDA_VMM_SUPPORT" = "x1"])
145+
AC_DEFINE_UNQUOTED([OPAL_CUDA_VMM_SUPPORT],$CUDA_VMM_SUPPORT,
146+
[Whether we have CU_MEM_LOCATION_TYPE_HOST_NUMA support available])
147+
137148
AM_CONDITIONAL([OPAL_cuda_get_attributes], [test "x$CUDA_GET_ATTRIBUTES" = "x1"])
138149
AC_DEFINE_UNQUOTED([OPAL_CUDA_GET_ATTRIBUTES],$CUDA_GET_ATTRIBUTES,
139150
[Whether we have CUDA cuPointerGetAttributes function available])

ompi/mca/coll/cuda/coll_cuda.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/*
2+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
23
* Copyright (c) 2014 The University of Tennessee and The University
34
* of Tennessee Research Foundation. All rights
45
* reserved.
@@ -45,6 +46,11 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count,
4546
struct ompi_communicator_t *comm,
4647
mca_coll_base_module_t *module);
4748

49+
int mca_coll_cuda_reduce_local(const void *sbuf, void *rbuf, int count,
50+
struct ompi_datatype_t *dtype,
51+
struct ompi_op_t *op,
52+
mca_coll_base_module_t *module);
53+
4854
int mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count,
4955
struct ompi_datatype_t *dtype,
5056
struct ompi_op_t *op,

ompi/mca/coll/cuda/coll_cuda_module.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/*
2+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
23
* Copyright (c) 2014-2017 The University of Tennessee and The University
34
* of Tennessee Research Foundation. All rights
45
* reserved.
@@ -104,6 +105,7 @@ mca_coll_cuda_comm_query(struct ompi_communicator_t *comm,
104105
cuda_module->super.coll_gather = NULL;
105106
cuda_module->super.coll_gatherv = NULL;
106107
cuda_module->super.coll_reduce = mca_coll_cuda_reduce;
108+
cuda_module->super.coll_reduce_local = mca_coll_cuda_reduce_local;
107109
cuda_module->super.coll_reduce_scatter = NULL;
108110
cuda_module->super.coll_reduce_scatter_block = mca_coll_cuda_reduce_scatter_block;
109111
cuda_module->super.coll_scan = mca_coll_cuda_scan;

ompi/mca/coll/cuda/coll_cuda_reduce.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/*
2+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
23
* Copyright (c) 2004-2015 The University of Tennessee and The University
34
* of Tennessee Research Foundation. All rights
45
* reserved.
@@ -78,3 +79,50 @@ mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count,
7879
}
7980
return rc;
8081
}
82+
83+
int
84+
mca_coll_cuda_reduce_local(const void *sbuf, void *rbuf, int count,
85+
struct ompi_datatype_t *dtype,
86+
struct ompi_op_t *op,
87+
mca_coll_base_module_t *module)
88+
{
89+
ptrdiff_t gap;
90+
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
91+
size_t bufsize;
92+
int rc;
93+
94+
bufsize = opal_datatype_span(&dtype->super, count, &gap);
95+
96+
if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) {
97+
sbuf1 = (char*)malloc(bufsize);
98+
if (NULL == sbuf1) {
99+
return OMPI_ERR_OUT_OF_RESOURCE;
100+
}
101+
opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize);
102+
sbuf = sbuf1 - gap;
103+
}
104+
105+
if (opal_cuda_check_bufs((char *)rbuf, NULL)) {
106+
rbuf1 = (char*)malloc(bufsize);
107+
if (NULL == rbuf1) {
108+
if (NULL != sbuf1) free(sbuf1);
109+
return OMPI_ERR_OUT_OF_RESOURCE;
110+
}
111+
opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize);
112+
rbuf2 = rbuf; /* save away original buffer */
113+
rbuf = rbuf1 - gap;
114+
}
115+
116+
ompi_op_reduce(op, (void *)sbuf, rbuf, count, dtype);
117+
rc = OMPI_SUCCESS;
118+
119+
if (NULL != sbuf1) {
120+
free(sbuf1);
121+
}
122+
if (NULL != rbuf1) {
123+
rbuf = rbuf2;
124+
opal_cuda_memcpy_sync(rbuf, rbuf1, bufsize);
125+
free(rbuf1);
126+
}
127+
return rc;
128+
}

ompi/mca/coll/ucc/coll_ucc_allgather.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
#include "coll_ucc_common.h"
1111

12-
static inline ucc_status_t mca_coll_ucc_allgather_init(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
13-
void* rbuf, int rcount, struct ompi_datatype_t *rdtype,
12+
static inline ucc_status_t mca_coll_ucc_allgather_init(const void *sbuf, size_t scount, struct ompi_datatype_t *sdtype,
13+
void* rbuf, size_t rcount, struct ompi_datatype_t *rdtype,
1414
mca_coll_ucc_module_t *ucc_module,
1515
ucc_coll_req_h *req,
1616
mca_coll_ucc_req_t *coll_req)

ompi/mca/coll/ucc/coll_ucc_allgatherv.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
#include "coll_ucc_common.h"
1111

12-
static inline ucc_status_t mca_coll_ucc_allgatherv_init(const void *sbuf, int scount,
12+
static inline ucc_status_t mca_coll_ucc_allgatherv_init(const void *sbuf, size_t scount,
1313
struct ompi_datatype_t *sdtype,
1414
void* rbuf, const int *rcounts, const int *rdisps,
1515
struct ompi_datatype_t *rdtype,

ompi/mca/coll/ucc/coll_ucc_allreduce.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
#include "coll_ucc_common.h"
1111

12-
static inline ucc_status_t mca_coll_ucc_allreduce_init(const void *sbuf, void *rbuf, int count,
12+
static inline ucc_status_t mca_coll_ucc_allreduce_init(const void *sbuf, void *rbuf, size_t count,
1313
struct ompi_datatype_t *dtype,
1414
struct ompi_op_t *op, mca_coll_ucc_module_t *ucc_module,
1515
ucc_coll_req_h *req,

ompi/mca/coll/ucc/coll_ucc_alltoall.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
#include "coll_ucc_common.h"
1111

12-
static inline ucc_status_t mca_coll_ucc_alltoall_init(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
13-
void* rbuf, int rcount, struct ompi_datatype_t *rdtype,
12+
static inline ucc_status_t mca_coll_ucc_alltoall_init(const void *sbuf, size_t scount, struct ompi_datatype_t *sdtype,
13+
void* rbuf, size_t rcount, struct ompi_datatype_t *rdtype,
1414
mca_coll_ucc_module_t *ucc_module,
1515
ucc_coll_req_h *req,
1616
mca_coll_ucc_req_t *coll_req)

ompi/mca/coll/ucc/coll_ucc_bcast.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
#include "coll_ucc_common.h"
1010

11-
static inline ucc_status_t mca_coll_ucc_bcast_init(void *buf, int count, struct ompi_datatype_t *dtype,
11+
static inline ucc_status_t mca_coll_ucc_bcast_init(void *buf, size_t count, struct ompi_datatype_t *dtype,
1212
int root, mca_coll_ucc_module_t *ucc_module,
1313
ucc_coll_req_h *req,
1414
mca_coll_ucc_req_t *coll_req)

ompi/mca/coll/ucc/coll_ucc_reduce.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
#include "coll_ucc_common.h"
1010

11-
static inline ucc_status_t mca_coll_ucc_reduce_init(const void *sbuf, void *rbuf, int count,
11+
static inline ucc_status_t mca_coll_ucc_reduce_init(const void *sbuf, void *rbuf, size_t count,
1212
struct ompi_datatype_t *dtype,
1313
struct ompi_op_t *op, int root,
1414
mca_coll_ucc_module_t *ucc_module,

ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,8 +729,10 @@ exit :
729729
}
730730
free(broken_iov_arrays);
731731
free(fh->f_procs_in_group);
732+
free(fh->f_aggr_list);
732733
fh->f_procs_in_group=NULL;
733734
fh->f_procs_per_group=0;
735+
fh->f_aggr_list=NULL;
734736
free(result_counts);
735737
free(reqs);
736738

ompi/mca/fs/lustre/fs_lustre.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
* Copyright (c) 2015-2018 Research Organization for Information Science
1414
* and Technology (RIST). All rights reserved.
1515
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
16+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserverd.
1617
* $COPYRIGHT$
1718
*
1819
* Additional copyrights may follow
@@ -31,6 +32,12 @@
3132
extern int mca_fs_lustre_priority;
3233
extern int mca_fs_lustre_stripe_size;
3334
extern int mca_fs_lustre_stripe_width;
35+
extern int mca_fs_lustre_lock_algorithm;
36+
37+
#define FS_LUSTRE_LOCK_AUTO 0
38+
#define FS_LUSTRE_LOCK_NEVER 1
39+
#define FS_LUSTRE_LOCK_ENTIRE_FILE 2
40+
#define FS_LUSTRE_LOCK_RANGES 3
3441

3542
BEGIN_C_DECLS
3643

ompi/mca/fs/lustre/fs_lustre_component.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
* Copyright (c) 2008-2011 University of Houston. All rights reserved.
1414
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
1515
* reserved.
16+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserverd.
1617
* $COPYRIGHT$
1718
*
1819
* Additional copyrights may follow
@@ -45,6 +46,7 @@ int mca_fs_lustre_priority = 20;
4546
runtime also*/
4647
int mca_fs_lustre_stripe_size = 0;
4748
int mca_fs_lustre_stripe_width = 0;
49+
int mca_fs_lustre_lock_algorithm = 0; /* auto */
4850
/*
4951
* Instantiate the public struct with all of our public information
5052
* and pointers to our public functions in it
@@ -93,6 +95,15 @@ lustre_register(void)
9395
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
9496
OPAL_INFO_LVL_9,
9597
MCA_BASE_VAR_SCOPE_READONLY, &mca_fs_lustre_stripe_width);
98+
mca_fs_lustre_lock_algorithm = 0;
99+
(void) mca_base_component_var_register(&mca_fs_lustre_component.fsm_version,
100+
"lock_algorithm", "Locking algorithm used by the fs ufs component. "
101+
" 0: auto (default), 1: skip locking, 2: always lock entire file, "
102+
"3: lock only specific ranges",
103+
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
104+
OPAL_INFO_LVL_9,
105+
MCA_BASE_VAR_SCOPE_READONLY,
106+
&mca_fs_lustre_lock_algorithm );
96107

97108
return OMPI_SUCCESS;
98109
}

ompi/mca/fs/lustre/fs_lustre_file_open.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
* Copyright (c) 2015-2020 Research Organization for Information Science
1414
* and Technology (RIST). All rights reserved.
1515
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
16+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserverd.
1617
* $COPYRIGHT$
1718
*
1819
* Additional copyrights may follow
@@ -171,8 +172,22 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm,
171172
fh->f_stripe_size = lump->lmm_stripe_size;
172173
fh->f_stripe_count = lump->lmm_stripe_count;
173174
fh->f_fs_block_size = lump->lmm_stripe_size;
174-
fh->f_flags |= OMPIO_LOCK_NEVER;
175175
free(lump);
176176

177+
if (FS_LUSTRE_LOCK_AUTO == mca_fs_lustre_lock_algorithm ||
178+
FS_LUSTRE_LOCK_NEVER == mca_fs_lustre_lock_algorithm ) {
179+
fh->f_flags |= OMPIO_LOCK_NEVER;
180+
}
181+
else if (FS_LUSTRE_LOCK_ENTIRE_FILE == mca_fs_lustre_lock_algorithm) {
182+
fh->f_flags |= OMPIO_LOCK_ENTIRE_FILE;
183+
}
184+
else if (FS_LUSTRE_LOCK_RANGES == mca_fs_lustre_lock_algorithm) {
185+
/* Nothing to be done. This is what the posix fbtl component would do
186+
anyway without additional information . */
187+
}
188+
else {
189+
opal_output ( 1, "Invalid value for mca_fs_lustre_lock_algorithm %d", mca_fs_lustre_lock_algorithm );
190+
}
191+
177192
return OMPI_SUCCESS;
178193
}

ompi/mca/fs/ufs/fs_ufs_file_open.c

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
* Copyright (c) 2015-2018 Research Organization for Information Science
1414
* and Technology (RIST). All rights reserved.
1515
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
16+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserverd.
1617
* $COPYRIGHT$
1718
*
1819
* Additional copyrights may follow
@@ -105,12 +106,6 @@ mca_fs_ufs_file_open (struct ompi_communicator_t *comm,
105106
component. */
106107
fh->f_flags |= OMPIO_LOCK_ENTIRE_FILE;
107108
}
108-
else {
109-
fh->f_flags |= OMPIO_LOCK_NEVER;
110-
}
111-
}
112-
else {
113-
fh->f_flags |= OMPIO_LOCK_NEVER;
114109
}
115110
free (fstype);
116111
}

0 commit comments

Comments
 (0)