diff --git a/config/ompi_check_ucc.m4 b/config/ompi_check_ucc.m4 index 30f1319e048..4fb05f658d2 100644 --- a/config/ompi_check_ucc.m4 +++ b/config/ompi_check_ucc.m4 @@ -41,6 +41,15 @@ AC_DEFUN([OMPI_CHECK_UCC],[ LIBS="${$1_LIBS} ${LIBS}" AC_CHECK_FUNCS(ucc_comm_free, [], []) + AC_MSG_CHECKING([if UCC supports float128 and float32(64,128)_complex datatypes]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]], + [[ucc_datatype_t dt = UCC_DT_FLOAT32_COMPLEX;]])], + [flag=1 + AC_MSG_RESULT([yes])], + [flag=0 + AC_MSG_RESULT([no])]) + AC_DEFINE_UNQUOTED(UCC_HAVE_COMPLEX_AND_FLOAT128_DT, $flag, [Check if float128 and float32(64,128)_complex dt are available in ucc.]) + CPPFLAGS=$CPPFLAGS_save LDFLAGS=$LDFLAGS_save LIBS=$LIBS_save]) diff --git a/ompi/mca/coll/ucc/coll_ucc_dtypes.h b/ompi/mca/coll/ucc/coll_ucc_dtypes.h index 94da0ede803..ad16a00ad26 100644 --- a/ompi/mca/coll/ucc/coll_ucc_dtypes.h +++ b/ompi/mca/coll/ucc/coll_ucc_dtypes.h @@ -17,39 +17,58 @@ #define COLL_UCC_OP_UNSUPPORTED ((ucc_reduction_op_t)-1) static ucc_datatype_t ompi_datatype_2_ucc_dt[OPAL_DATATYPE_MAX_PREDEFINED] = { - [OPAL_DATATYPE_LOOP] = COLL_UCC_DT_UNSUPPORTED, - [OPAL_DATATYPE_END_LOOP] = COLL_UCC_DT_UNSUPPORTED, - [OPAL_DATATYPE_LB] = COLL_UCC_DT_UNSUPPORTED, - [OPAL_DATATYPE_UB] = COLL_UCC_DT_UNSUPPORTED, - [OPAL_DATATYPE_INT1] = UCC_DT_INT8, - [OPAL_DATATYPE_INT2] = UCC_DT_INT16, - [OPAL_DATATYPE_INT4] = UCC_DT_INT32, - [OPAL_DATATYPE_INT8] = UCC_DT_INT64, - [OPAL_DATATYPE_INT16] = UCC_DT_INT128, - [OPAL_DATATYPE_UINT1] = UCC_DT_UINT8, - [OPAL_DATATYPE_UINT2] = UCC_DT_UINT16, - [OPAL_DATATYPE_UINT4] = UCC_DT_UINT32, - [OPAL_DATATYPE_UINT8] = UCC_DT_UINT64, - [OPAL_DATATYPE_UINT16] = UCC_DT_UINT128, - [OPAL_DATATYPE_FLOAT2] = UCC_DT_FLOAT16, - [OPAL_DATATYPE_FLOAT4] = UCC_DT_FLOAT32, - [OPAL_DATATYPE_FLOAT8] = UCC_DT_FLOAT64, - [OPAL_DATATYPE_FLOAT12] = COLL_UCC_DT_UNSUPPORTED, - [OPAL_DATATYPE_FLOAT16] = COLL_UCC_DT_UNSUPPORTED, - [OPAL_DATATYPE_SHORT_FLOAT_COMPLEX] = COLL_UCC_DT_UNSUPPORTED, - [OPAL_DATATYPE_FLOAT_COMPLEX] = COLL_UCC_DT_UNSUPPORTED, - [OPAL_DATATYPE_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED, - [OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED, - [OPAL_DATATYPE_BOOL] = COLL_UCC_DT_UNSUPPORTED, - [OPAL_DATATYPE_WCHAR] = COLL_UCC_DT_UNSUPPORTED, + [OPAL_DATATYPE_LOOP] = COLL_UCC_DT_UNSUPPORTED, + [OPAL_DATATYPE_END_LOOP] = COLL_UCC_DT_UNSUPPORTED, + [OPAL_DATATYPE_LB] = COLL_UCC_DT_UNSUPPORTED, + [OPAL_DATATYPE_UB] = COLL_UCC_DT_UNSUPPORTED, + [OPAL_DATATYPE_INT1] = UCC_DT_INT8, + [OPAL_DATATYPE_INT2] = UCC_DT_INT16, + [OPAL_DATATYPE_INT4] = UCC_DT_INT32, + [OPAL_DATATYPE_INT8] = UCC_DT_INT64, + [OPAL_DATATYPE_INT16] = UCC_DT_INT128, + [OPAL_DATATYPE_UINT1] = UCC_DT_UINT8, + [OPAL_DATATYPE_UINT2] = UCC_DT_UINT16, + [OPAL_DATATYPE_UINT4] = UCC_DT_UINT32, + [OPAL_DATATYPE_UINT8] = UCC_DT_UINT64, + [OPAL_DATATYPE_UINT16] = UCC_DT_UINT128, + [OPAL_DATATYPE_FLOAT2] = UCC_DT_FLOAT16, + [OPAL_DATATYPE_FLOAT4] = UCC_DT_FLOAT32, + [OPAL_DATATYPE_FLOAT8] = UCC_DT_FLOAT64, + [OPAL_DATATYPE_FLOAT12] = COLL_UCC_DT_UNSUPPORTED, + [OPAL_DATATYPE_BOOL] = COLL_UCC_DT_UNSUPPORTED, + [OPAL_DATATYPE_WCHAR] = COLL_UCC_DT_UNSUPPORTED, + [OPAL_DATATYPE_SHORT_FLOAT_COMPLEX] = COLL_UCC_DT_UNSUPPORTED, #if SIZEOF_LONG == 4 - [OPAL_DATATYPE_LONG] = UCC_DT_INT32, - [OPAL_DATATYPE_UNSIGNED_LONG] = UCC_DT_UINT32, + [OPAL_DATATYPE_LONG] = UCC_DT_INT32, + [OPAL_DATATYPE_UNSIGNED_LONG] = UCC_DT_UINT32, #elif SIZEOF_LONG == 8 - [OPAL_DATATYPE_LONG] = UCC_DT_INT64, - [OPAL_DATATYPE_UNSIGNED_LONG] = UCC_DT_UINT64, + [OPAL_DATATYPE_LONG] = UCC_DT_INT64, + [OPAL_DATATYPE_UNSIGNED_LONG] = UCC_DT_UINT64, #endif - [OPAL_DATATYPE_UNAVAILABLE] = COLL_UCC_DT_UNSUPPORTED +#if UCC_HAVE_COMPLEX_AND_FLOAT128_DT + [OPAL_DATATYPE_FLOAT16] = UCC_DT_FLOAT128, + #if SIZEOF_FLOAT__COMPLEX == 8 + [OPAL_DATATYPE_FLOAT_COMPLEX] = UCC_DT_FLOAT32_COMPLEX, + #else + [OPAL_DATATYPE_FLOAT_COMPLEX] = COLL_UCC_DT_UNSUPPORTED, + #endif + #if SIZEOF_DOUBLE__COMPLEX == 16 + [OPAL_DATATYPE_DOUBLE_COMPLEX] = UCC_DT_FLOAT64_COMPLEX, + #else + [OPAL_DATATYPE_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED, + #endif + #if SIZEOF_LONG_DOUBLE__COMPLEX == 32 + [OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = UCC_DT_FLOAT128_COMPLEX, + #else + [OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED, + #endif +#else + [OPAL_DATATYPE_FLOAT16] = COLL_UCC_DT_UNSUPPORTED, + [OPAL_DATATYPE_FLOAT_COMPLEX] = COLL_UCC_DT_UNSUPPORTED, + [OPAL_DATATYPE_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED, + [OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED, +#endif + [OPAL_DATATYPE_UNAVAILABLE] = COLL_UCC_DT_UNSUPPORTED }; static inline ucc_datatype_t ompi_dtype_to_ucc_dtype(ompi_datatype_t *dtype) diff --git a/oshmem/mca/scoll/ucc/scoll_ucc_dtypes.h b/oshmem/mca/scoll/ucc/scoll_ucc_dtypes.h index 5f2c5d75c4a..fe7fe8f3f5b 100644 --- a/oshmem/mca/scoll/ucc/scoll_ucc_dtypes.h +++ b/oshmem/mca/scoll/ucc/scoll_ucc_dtypes.h @@ -19,25 +19,68 @@ #define SCOLL_UCC_OP_UNSUPPORTED -1 static ucc_datatype_t shmem_datatype_to_ucc_dt[OSHMEM_OP_TYPE_NUMBER + 1] = { - UCC_DT_INT16, /* OSHMEM_OP_TYPE_SHORT 0 */ - UCC_DT_INT32, /* OSHMEM_OP_TYPE_INT 1 */ - UCC_DT_INT64, /* OSHMEM_OP_TYPE_LONG 2 */ - UCC_DT_INT64, /* OSHMEM_OP_TYPE_LLONG 3 */ - UCC_DT_INT16, /* OSHMEM_OP_TYPE_INT16_T 4 */ - UCC_DT_INT32, /* OSHMEM_OP_TYPE_INT32_T 5 */ - UCC_DT_INT64, /* OSHMEM_OP_TYPE_INT64_T 6 */ - UCC_DT_FLOAT32, /* OSHMEM_OP_TYPE_FLOAT 7 */ - UCC_DT_FLOAT64, /* OSHMEM_OP_TYPE_DOUBLE 8 */ - SCOLL_UCC_DT_UNSUPPORTED, /* OSHMEM_OP_TYPE_LDOUBLE 9 */ - SCOLL_UCC_DT_UNSUPPORTED, /* OSHMEM_OP_TYPE_FCOMPLEX 10 */ - SCOLL_UCC_DT_UNSUPPORTED, /* OSHMEM_OP_TYPE_DCOMPLEX 11 */ - UCC_DT_INT16, /* OSHMEM_OP_TYPE_FINT2 12 */ - UCC_DT_INT32, /* OSHMEM_OP_TYPE_FINT4 13 */ - UCC_DT_INT64, /* OSHMEM_OP_TYPE_FINT8 14 */ - UCC_DT_FLOAT32, /* OSHMEM_OP_TYPE_FREAL4 15 */ - UCC_DT_FLOAT64, /* OSHMEM_OP_TYPE_FREAL8 16 */ - SCOLL_UCC_DT_UNSUPPORTED, /* OSHMEM_OP_TYPE_FREAL16 17 */ - SCOLL_UCC_DT_UNSUPPORTED /* OSHMEM_OP_TYPE_NUMBER 18 */ +#if SIZEOF_SHORT == 2 + [OSHMEM_OP_TYPE_SHORT] = UCC_DT_INT16, +#else + [OSHMEM_OP_TYPE_SHORT] = SCOLL_UCC_DT_UNSUPPORTED, +#endif +#if SIZEOF_INT == 4 + [OSHMEM_OP_TYPE_INT] = UCC_DT_INT32, +#else + [OSHMEM_OP_TYPE_INT] = SCOLL_UCC_DT_UNSUPPORTED, +#endif +#if SIZEOF_LONG == 8 + [OSHMEM_OP_TYPE_LONG] = UCC_DT_INT64, +#else + [OSHMEM_OP_TYPE_LONG] = SCOLL_UCC_DT_UNSUPPORTED, +#endif +#if SIZEOF_LONG_LONG == 8 + [OSHMEM_OP_TYPE_LLONG] = UCC_DT_INT64, +#else + [OSHMEM_OP_TYPE_LLONG] = SCOLL_UCC_DT_UNSUPPORTED, +#endif + [OSHMEM_OP_TYPE_INT16_T] = UCC_DT_INT16, + [OSHMEM_OP_TYPE_INT32_T] = UCC_DT_INT32, + [OSHMEM_OP_TYPE_INT64_T] = UCC_DT_INT64, +#if SIZEOF_FLOAT == 4 + [OSHMEM_OP_TYPE_FLOAT] = UCC_DT_FLOAT32, +#else + [OSHMEM_OP_TYPE_FLOAT] = SCOLL_UCC_DT_UNSUPPORTED, +#endif +#if SIZEOF_DOUBLE == 8 + [OSHMEM_OP_TYPE_DOUBLE] = UCC_DT_FLOAT64, +#else + [OSHMEM_OP_TYPE_DOUBLE] = SCOLL_UCC_DT_UNSUPPORTED, +#endif +#if UCC_HAVE_COMPLEX_AND_FLOAT128_DT + [OSHMEM_OP_TYPE_FREAL16] = UCC_DT_FLOAT128, + #if SIZEOF_LONG_DOUBLE == 16 + [OSHMEM_OP_TYPE_LDOUBLE] = UCC_DT_FLOAT128, + #else + [OSHMEM_OP_TYPE_LDOUBLE] = SCOLL_UCC_DT_UNSUPPORTED, + #endif + #if SIZEOF_FLOAT__COMPLEX == 8 + [OSHMEM_OP_TYPE_FCOMPLEX] = UCC_DT_FLOAT32_COMPLEX, + #else + [OSHMEM_OP_TYPE_FCOMPLEX] = SCOLL_UCC_DT_UNSUPPORTED, + #endif + #if SIZEOF_DOUBLE__COMPLEX == 16 + [OSHMEM_OP_TYPE_DCOMPLEX] = UCC_DT_FLOAT64_COMPLEX, + #else + [OSHMEM_OP_TYPE_DCOMPLEX] = SCOLL_UCC_DT_UNSUPPORTED, + #endif +#else + [OSHMEM_OP_TYPE_FREAL16] = SCOLL_UCC_DT_UNSUPPORTED, + [OSHMEM_OP_TYPE_LDOUBLE] = SCOLL_UCC_DT_UNSUPPORTED, + [OSHMEM_OP_TYPE_FCOMPLEX] = SCOLL_UCC_DT_UNSUPPORTED, + [OSHMEM_OP_TYPE_DCOMPLEX] = SCOLL_UCC_DT_UNSUPPORTED, +#endif + [OSHMEM_OP_TYPE_FINT2] = UCC_DT_INT16, + [OSHMEM_OP_TYPE_FINT4] = UCC_DT_INT32, + [OSHMEM_OP_TYPE_FINT8] = UCC_DT_INT64, + [OSHMEM_OP_TYPE_FREAL4] = UCC_DT_FLOAT32, + [OSHMEM_OP_TYPE_FREAL8] = UCC_DT_FLOAT64, + [OSHMEM_OP_TYPE_NUMBER] = SCOLL_UCC_DT_UNSUPPORTED }; static inline ucc_datatype_t shmem_op_to_ucc_dtype(oshmem_op_t *op)