Skip to content

coll/ucc: add support for dt float128 float32(64,128)_complex #10500

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions config/ompi_check_ucc.m4
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,15 @@ AC_DEFUN([OMPI_CHECK_UCC],[
LIBS="${$1_LIBS} ${LIBS}"
AC_CHECK_FUNCS(ucc_comm_free, [], [])

AC_MSG_CHECKING([if UCC supports float128 and float32(64,128)_complex datatypes])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <ucc/api/ucc.h>]],
[[ucc_datatype_t dt = UCC_DT_FLOAT32_COMPLEX;]])],
[flag=1
AC_MSG_RESULT([yes])],
[flag=0
AC_MSG_RESULT([no])])
AC_DEFINE_UNQUOTED(UCC_HAVE_COMPLEX_AND_FLOAT128_DT, $flag, [Check if float128 and float32(64,128)_complex dt are available in ucc.])

CPPFLAGS=$CPPFLAGS_save
LDFLAGS=$LDFLAGS_save
LIBS=$LIBS_save])
Expand Down
79 changes: 49 additions & 30 deletions ompi/mca/coll/ucc/coll_ucc_dtypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,39 +17,58 @@
#define COLL_UCC_OP_UNSUPPORTED ((ucc_reduction_op_t)-1)

static ucc_datatype_t ompi_datatype_2_ucc_dt[OPAL_DATATYPE_MAX_PREDEFINED] = {
[OPAL_DATATYPE_LOOP] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_END_LOOP] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_LB] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_UB] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_INT1] = UCC_DT_INT8,
[OPAL_DATATYPE_INT2] = UCC_DT_INT16,
[OPAL_DATATYPE_INT4] = UCC_DT_INT32,
[OPAL_DATATYPE_INT8] = UCC_DT_INT64,
[OPAL_DATATYPE_INT16] = UCC_DT_INT128,
[OPAL_DATATYPE_UINT1] = UCC_DT_UINT8,
[OPAL_DATATYPE_UINT2] = UCC_DT_UINT16,
[OPAL_DATATYPE_UINT4] = UCC_DT_UINT32,
[OPAL_DATATYPE_UINT8] = UCC_DT_UINT64,
[OPAL_DATATYPE_UINT16] = UCC_DT_UINT128,
[OPAL_DATATYPE_FLOAT2] = UCC_DT_FLOAT16,
[OPAL_DATATYPE_FLOAT4] = UCC_DT_FLOAT32,
[OPAL_DATATYPE_FLOAT8] = UCC_DT_FLOAT64,
[OPAL_DATATYPE_FLOAT12] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_FLOAT16] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_SHORT_FLOAT_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_FLOAT_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_BOOL] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_WCHAR] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_LOOP] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_END_LOOP] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_LB] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_UB] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_INT1] = UCC_DT_INT8,
[OPAL_DATATYPE_INT2] = UCC_DT_INT16,
[OPAL_DATATYPE_INT4] = UCC_DT_INT32,
[OPAL_DATATYPE_INT8] = UCC_DT_INT64,
[OPAL_DATATYPE_INT16] = UCC_DT_INT128,
[OPAL_DATATYPE_UINT1] = UCC_DT_UINT8,
[OPAL_DATATYPE_UINT2] = UCC_DT_UINT16,
[OPAL_DATATYPE_UINT4] = UCC_DT_UINT32,
[OPAL_DATATYPE_UINT8] = UCC_DT_UINT64,
[OPAL_DATATYPE_UINT16] = UCC_DT_UINT128,
[OPAL_DATATYPE_FLOAT2] = UCC_DT_FLOAT16,
[OPAL_DATATYPE_FLOAT4] = UCC_DT_FLOAT32,
[OPAL_DATATYPE_FLOAT8] = UCC_DT_FLOAT64,
[OPAL_DATATYPE_FLOAT12] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_BOOL] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_WCHAR] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_SHORT_FLOAT_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
#if SIZEOF_LONG == 4
[OPAL_DATATYPE_LONG] = UCC_DT_INT32,
[OPAL_DATATYPE_UNSIGNED_LONG] = UCC_DT_UINT32,
[OPAL_DATATYPE_LONG] = UCC_DT_INT32,
[OPAL_DATATYPE_UNSIGNED_LONG] = UCC_DT_UINT32,
#elif SIZEOF_LONG == 8
[OPAL_DATATYPE_LONG] = UCC_DT_INT64,
[OPAL_DATATYPE_UNSIGNED_LONG] = UCC_DT_UINT64,
[OPAL_DATATYPE_LONG] = UCC_DT_INT64,
[OPAL_DATATYPE_UNSIGNED_LONG] = UCC_DT_UINT64,
#endif
[OPAL_DATATYPE_UNAVAILABLE] = COLL_UCC_DT_UNSUPPORTED
#if UCC_HAVE_COMPLEX_AND_FLOAT128_DT
[OPAL_DATATYPE_FLOAT16] = UCC_DT_FLOAT128,
#if SIZEOF_FLOAT__COMPLEX == 8
[OPAL_DATATYPE_FLOAT_COMPLEX] = UCC_DT_FLOAT32_COMPLEX,
#else
[OPAL_DATATYPE_FLOAT_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
#endif
#if SIZEOF_DOUBLE__COMPLEX == 16
[OPAL_DATATYPE_DOUBLE_COMPLEX] = UCC_DT_FLOAT64_COMPLEX,
#else
[OPAL_DATATYPE_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
#endif
#if SIZEOF_LONG_DOUBLE__COMPLEX == 32
[OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = UCC_DT_FLOAT128_COMPLEX,
#else
[OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
#endif
#else
[OPAL_DATATYPE_FLOAT16] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_FLOAT_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
[OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
#endif
[OPAL_DATATYPE_UNAVAILABLE] = COLL_UCC_DT_UNSUPPORTED
};

static inline ucc_datatype_t ompi_dtype_to_ucc_dtype(ompi_datatype_t *dtype)
Expand Down
81 changes: 62 additions & 19 deletions oshmem/mca/scoll/ucc/scoll_ucc_dtypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,68 @@
#define SCOLL_UCC_OP_UNSUPPORTED -1

static ucc_datatype_t shmem_datatype_to_ucc_dt[OSHMEM_OP_TYPE_NUMBER + 1] = {
UCC_DT_INT16, /* OSHMEM_OP_TYPE_SHORT 0 */
UCC_DT_INT32, /* OSHMEM_OP_TYPE_INT 1 */
UCC_DT_INT64, /* OSHMEM_OP_TYPE_LONG 2 */
UCC_DT_INT64, /* OSHMEM_OP_TYPE_LLONG 3 */
UCC_DT_INT16, /* OSHMEM_OP_TYPE_INT16_T 4 */
UCC_DT_INT32, /* OSHMEM_OP_TYPE_INT32_T 5 */
UCC_DT_INT64, /* OSHMEM_OP_TYPE_INT64_T 6 */
UCC_DT_FLOAT32, /* OSHMEM_OP_TYPE_FLOAT 7 */
UCC_DT_FLOAT64, /* OSHMEM_OP_TYPE_DOUBLE 8 */
SCOLL_UCC_DT_UNSUPPORTED, /* OSHMEM_OP_TYPE_LDOUBLE 9 */
SCOLL_UCC_DT_UNSUPPORTED, /* OSHMEM_OP_TYPE_FCOMPLEX 10 */
SCOLL_UCC_DT_UNSUPPORTED, /* OSHMEM_OP_TYPE_DCOMPLEX 11 */
UCC_DT_INT16, /* OSHMEM_OP_TYPE_FINT2 12 */
UCC_DT_INT32, /* OSHMEM_OP_TYPE_FINT4 13 */
UCC_DT_INT64, /* OSHMEM_OP_TYPE_FINT8 14 */
UCC_DT_FLOAT32, /* OSHMEM_OP_TYPE_FREAL4 15 */
UCC_DT_FLOAT64, /* OSHMEM_OP_TYPE_FREAL8 16 */
SCOLL_UCC_DT_UNSUPPORTED, /* OSHMEM_OP_TYPE_FREAL16 17 */
SCOLL_UCC_DT_UNSUPPORTED /* OSHMEM_OP_TYPE_NUMBER 18 */
#if SIZEOF_SHORT == 2
[OSHMEM_OP_TYPE_SHORT] = UCC_DT_INT16,
#else
[OSHMEM_OP_TYPE_SHORT] = SCOLL_UCC_DT_UNSUPPORTED,
#endif
#if SIZEOF_INT == 4
[OSHMEM_OP_TYPE_INT] = UCC_DT_INT32,
#else
[OSHMEM_OP_TYPE_INT] = SCOLL_UCC_DT_UNSUPPORTED,
#endif
#if SIZEOF_LONG == 8
[OSHMEM_OP_TYPE_LONG] = UCC_DT_INT64,
#else
[OSHMEM_OP_TYPE_LONG] = SCOLL_UCC_DT_UNSUPPORTED,
#endif
#if SIZEOF_LONG_LONG == 8
[OSHMEM_OP_TYPE_LLONG] = UCC_DT_INT64,
#else
[OSHMEM_OP_TYPE_LLONG] = SCOLL_UCC_DT_UNSUPPORTED,
#endif
[OSHMEM_OP_TYPE_INT16_T] = UCC_DT_INT16,
[OSHMEM_OP_TYPE_INT32_T] = UCC_DT_INT32,
[OSHMEM_OP_TYPE_INT64_T] = UCC_DT_INT64,
#if SIZEOF_FLOAT == 4
[OSHMEM_OP_TYPE_FLOAT] = UCC_DT_FLOAT32,
#else
[OSHMEM_OP_TYPE_FLOAT] = SCOLL_UCC_DT_UNSUPPORTED,
#endif
#if SIZEOF_DOUBLE == 8
[OSHMEM_OP_TYPE_DOUBLE] = UCC_DT_FLOAT64,
#else
[OSHMEM_OP_TYPE_DOUBLE] = SCOLL_UCC_DT_UNSUPPORTED,
#endif
#if UCC_HAVE_COMPLEX_AND_FLOAT128_DT
[OSHMEM_OP_TYPE_FREAL16] = UCC_DT_FLOAT128,
#if SIZEOF_LONG_DOUBLE == 16
[OSHMEM_OP_TYPE_LDOUBLE] = UCC_DT_FLOAT128,
#else
[OSHMEM_OP_TYPE_LDOUBLE] = SCOLL_UCC_DT_UNSUPPORTED,
#endif
#if SIZEOF_FLOAT__COMPLEX == 8
[OSHMEM_OP_TYPE_FCOMPLEX] = UCC_DT_FLOAT32_COMPLEX,
#else
[OSHMEM_OP_TYPE_FCOMPLEX] = SCOLL_UCC_DT_UNSUPPORTED,
#endif
#if SIZEOF_DOUBLE__COMPLEX == 16
[OSHMEM_OP_TYPE_DCOMPLEX] = UCC_DT_FLOAT64_COMPLEX,
#else
[OSHMEM_OP_TYPE_DCOMPLEX] = SCOLL_UCC_DT_UNSUPPORTED,
#endif
#else
[OSHMEM_OP_TYPE_FREAL16] = SCOLL_UCC_DT_UNSUPPORTED,
[OSHMEM_OP_TYPE_LDOUBLE] = SCOLL_UCC_DT_UNSUPPORTED,
[OSHMEM_OP_TYPE_FCOMPLEX] = SCOLL_UCC_DT_UNSUPPORTED,
[OSHMEM_OP_TYPE_DCOMPLEX] = SCOLL_UCC_DT_UNSUPPORTED,
#endif
[OSHMEM_OP_TYPE_FINT2] = UCC_DT_INT16,
[OSHMEM_OP_TYPE_FINT4] = UCC_DT_INT32,
[OSHMEM_OP_TYPE_FINT8] = UCC_DT_INT64,
[OSHMEM_OP_TYPE_FREAL4] = UCC_DT_FLOAT32,
[OSHMEM_OP_TYPE_FREAL8] = UCC_DT_FLOAT64,
[OSHMEM_OP_TYPE_NUMBER] = SCOLL_UCC_DT_UNSUPPORTED
};

static inline ucc_datatype_t shmem_op_to_ucc_dtype(oshmem_op_t *op)
Expand Down