open-mpi · jsquyres · Apr 5, 2021 · Mar 30, 2021 · Apr 2, 2021 · Apr 2, 2021
diff --git a/ompi/datatype/ompi_datatype.h b/ompi/datatype/ompi_datatype.h
@@ -59,7 +59,7 @@ BEGIN_C_DECLS
 #define OMPI_DATATYPE_FLAG_DATA_FORTRAN  0xC000
 #define OMPI_DATATYPE_FLAG_DATA_LANGUAGE 0xC000
 
-#define OMPI_DATATYPE_MAX_PREDEFINED 50
+#define OMPI_DATATYPE_MAX_PREDEFINED 52
 
 #if OMPI_DATATYPE_MAX_PREDEFINED > OPAL_DATATYPE_MAX_SUPPORTED
 #error Need to increase the number of supported dataypes by OPAL (value OPAL_DATATYPE_MAX_SUPPORTED).

diff --git a/ompi/datatype/ompi_datatype_external.c b/ompi/datatype/ompi_datatype_external.c
@@ -126,7 +126,7 @@ int ompi_datatype_pack_external_size(const char datarep[], int incount,
                                               CONVERTOR_SEND_CONVERSION,
                                               &local_convertor );
 
-    opal_convertor_get_unpacked_size( &local_convertor, &length );
+    opal_convertor_get_packed_size( &local_convertor, &length );
     *size = (MPI_Aint)length;
     OBJ_DESTRUCT( &local_convertor );
 

diff --git a/ompi/datatype/ompi_datatype_external32.c b/ompi/datatype/ompi_datatype_external32.c
@@ -26,39 +26,74 @@
 
 /* From the MPI standard. external32 use the following types:
  *   Type Length
- * MPI_PACKED               1
- * MPI_BYTE                 1
- * MPI_CHAR                 1
- * MPI_UNSIGNED_CHAR        1
- * MPI_SIGNED_CHAR          1
- * MPI_WCHAR                2
- * MPI_SHORT                2
- * MPI_UNSIGNED_SHORT       2
- * MPI_INT                  4
- * MPI_UNSIGNED             4
- * MPI_LONG                 4
- * MPI_UNSIGNED_LONG        4
- * MPI_FLOAT                4
- * MPI_DOUBLE               8
- * MPI_LONG_DOUBLE         16
+ * MPI_PACKED                   1
+ * MPI_BYTE                     1
+ * MPI_CHAR                     1
+ * MPI_UNSIGNED_CHAR            1
+ * MPI_SIGNED_CHAR              1
+ * MPI_WCHAR                    2
+ * MPI_SHORT                    2
+ * MPI_UNSIGNED_SHORT           2
+ * MPI_INT                      4
+ * MPI_LONG                     4
+ * MPI_UNSIGNED                 4
+ * MPI_UNSIGNED_LONG            4
+ * MPI_LONG_LONG_INT            8
+ * MPI_UNSIGNED_LONG_LONG       8
+ * MPI_FLOAT                    4
+ * MPI_DOUBLE                   8
+ * MPI_LONG_DOUBLE             16
+ *
+ * MPI_C_BOOL                   1
+ * MPI_INT8_T                   1
+ * MPI_INT16_T                  2
+ * MPI_INT32_T                  4
+ * MPI_INT64_T                  8
+ * MPI_UINT8_T                  1
+ * MPI_UINT16_T                 2
+ * MPI_UINT32_T                 4
+ * MPI_UINT64_T                 8
+ * MPI_AINT                     8
+ * MPI_COUNT                    8
+ * MPI_OFFSET                   8
+ * MPI_C_COMPLEX                2*4
+ * MPI_C_FLOAT_COMPLEX          2*4
+ * MPI_C_DOUBLE_COMPLEX         2*8
+ * MPI_C_LONG_DOUBLE_COMPLEX    2*16
+ *
  * Fortran types
- * MPI_CHARACTER            1
- * MPI_LOGICAL              4
- * MPI_INTEGER              4
- * MPI_REAL                 4
- * MPI_DOUBLE_PRECISION     8
- * MPI_COMPLEX              2*4
- * MPI_DOUBLE_COMPLEX       2*8
+ * MPI_CHARACTER                1
+ * MPI_LOGICAL                  4
+ * MPI_INTEGER                  4
+ * MPI_REAL                     4
+ * MPI_DOUBLE_PRECISION         8
+ * MPI_COMPLEX                  2*4
+ * MPI_DOUBLE_COMPLEX           2*8
+ *
+ * MPI_CXX_BOOL                 1
+ * MPI_CXX_FLOAT_COMPLEX        2*4
+ * MPI_CXX_DOUBLE_COMPLEX       2*8
+ * MPI_CXX_LONG_DOUBLE_COMPLEX  2*16
+ *
  * Optional types
- * MPI_INTEGER1             1
- * MPI_INTEGER2             2
- * MPI_INTEGER4             4
- * MPI_INTEGER8             8
- * MPI_LONG_LONG_INT        8
- * MPI_UNSIGNED_LONG_LONG   8
- * MPI_REAL4                4
- * MPI_REAL8                8
- * MPI_REAL16              16
+ * MPI_INTEGER1                 1
+ * MPI_INTEGER2                 2
+ * MPI_INTEGER4                 4
+ * MPI_INTEGER8                 8
+ * MPI_INTEGER16               16
+ * MPI_REAL2                    2
+ * MPI_REAL4                    4
+ * MPI_REAL8                    8
+ * MPI_REAL16                  16
+ * MPI_COMPLEX4                 2*2
+ * MPI_COMPLEX8                 2*4
+ * MPI_COMPLEX16                2*8
+ * MPI_COMPLEX32                2*16
+ *
+ * MPI_CXX_BOOL                 1
+ * MPI_CXX_FLOAT_COMPLEX        2*4
+ * MPI_CXX_DOUBLE_COMPLEX       2*8
+ * MPI_CXX_LONG_DOUBLE_COMPLEX  2*16
  *
  * All floating point values are in big-endian IEEE format. Double extended use 16 bytes, with
  * 15 exponent bits (bias = 10383), 112 mantissa bits and the same encoding as double. All

diff --git a/ompi/datatype/ompi_datatype_internal.h b/ompi/datatype/ompi_datatype_internal.h
@@ -109,8 +109,14 @@
 #define OMPI_DATATYPE_MPI_SHORT_FLOAT             0x30
 #define OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX   0x31
 
+/*
+ * Datatypes that have a different external32 length.
+ */
+#define OMPI_DATATYPE_MPI_LONG                    0x32
+#define OMPI_DATATYPE_MPI_UNSIGNED_LONG           0x33
+
 /* This should __ALWAYS__ stay last  */
-#define OMPI_DATATYPE_MPI_UNAVAILABLE             0x32
+#define OMPI_DATATYPE_MPI_UNAVAILABLE             0x34
 
 
 #define OMPI_DATATYPE_MPI_MAX_PREDEFINED          (OMPI_DATATYPE_MPI_UNAVAILABLE+1)
@@ -177,20 +183,6 @@
 #define OMPI_DATATYPE_MPI_UNSIGNED                OMPI_DATATYPE_MPI_UINT64_T
 #endif
 
-#if SIZEOF_LONG == 1
-#define OMPI_DATATYPE_MPI_LONG                    OMPI_DATATYPE_MPI_INT8_T
-#define OMPI_DATATYPE_MPI_UNSIGNED_LONG           OMPI_DATATYPE_MPI_UINT8_T
-#elif SIZEOF_LONG == 2
-#define OMPI_DATATYPE_MPI_LONG                    OMPI_DATATYPE_MPI_INT16_T
-#define OMPI_DATATYPE_MPI_UNSIGNED_LONG           OMPI_DATATYPE_MPI_UINT16_T
-#elif SIZEOF_LONG == 4
-#define OMPI_DATATYPE_MPI_LONG                    OMPI_DATATYPE_MPI_INT32_T
-#define OMPI_DATATYPE_MPI_UNSIGNED_LONG           OMPI_DATATYPE_MPI_UINT32_T
-#elif SIZEOF_LONG == 8
-#define OMPI_DATATYPE_MPI_LONG                    OMPI_DATATYPE_MPI_INT64_T
-#define OMPI_DATATYPE_MPI_UNSIGNED_LONG           OMPI_DATATYPE_MPI_UINT64_T
-#endif
-
 #if SIZEOF_LONG_LONG == 1
 #define OMPI_DATATYPE_MPI_LONG_LONG_INT           OMPI_DATATYPE_MPI_INT8_T
 #define OMPI_DATATYPE_MPI_UNSIGNED_LONG_LONG      OMPI_DATATYPE_MPI_UINT8_T
@@ -571,16 +563,8 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX
 #define OMPI_DATATYPE_INITIALIZER_UNSIGNED            OPAL_DATATYPE_INITIALIZER_UINT8
 #endif
 
-#if SIZEOF_LONG == 4
-#define OMPI_DATATYPE_INITIALIZER_LONG                OPAL_DATATYPE_INITIALIZER_INT4
-#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG       OPAL_DATATYPE_INITIALIZER_UINT4
-#elif SIZEOF_LONG == 8
-#define OMPI_DATATYPE_INITIALIZER_LONG                OPAL_DATATYPE_INITIALIZER_INT8
-#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG       OPAL_DATATYPE_INITIALIZER_UINT8
-#elif SIZEOF_LONG == 16
-#define OMPI_DATATYPE_INITIALIZER_LONG                OPAL_DATATYPE_INITIALIZER_INT16
-#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG       OPAL_DATATYPE_INITIALIZER_UINT16
-#endif
+#define OMPI_DATATYPE_INITIALIZER_LONG                OPAL_DATATYPE_INITIALIZER_LONG
+#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG       OPAL_DATATYPE_INITIALIZER_UNSIGNED_LONG
 
 #if SIZEOF_LONG_LONG == 4
 #define OMPI_DATATYPE_INITIALIZER_LONG_LONG_INT       OPAL_DATATYPE_INITIALIZER_INT4

diff --git a/ompi/datatype/ompi_datatype_module.c b/ompi/datatype/ompi_datatype_module.c
@@ -366,6 +366,8 @@ const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX_PREDEF
     [OMPI_DATATYPE_MPI_LB] = &ompi_mpi_lb.dt,
     [OMPI_DATATYPE_MPI_UB] = &ompi_mpi_ub.dt,
 
+    [OMPI_DATATYPE_MPI_LONG] = &ompi_mpi_long.dt,
+    [OMPI_DATATYPE_MPI_UNSIGNED_LONG] = &ompi_mpi_long.dt,
     /* MPI 3.0 types */
     [OMPI_DATATYPE_MPI_COUNT] = &ompi_mpi_count.dt,
 

diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.h b/ompi/mca/pml/ob1/pml_ob1_recvreq.h
@@ -234,8 +234,8 @@ static inline void prepare_recv_req_converter(mca_pml_ob1_recv_request_t *req)
                 req->req_recv.req_base.req_addr,
                 0,
                 &req->req_recv.req_base.req_convertor);
-        opal_convertor_get_unpacked_size(&req->req_recv.req_base.req_convertor,
-                                         &req->req_bytes_expected);
+        opal_convertor_get_packed_size(&req->req_recv.req_base.req_convertor,
+                                       &req->req_bytes_expected);
     }
 }
 

diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c
@@ -142,7 +142,13 @@ opal_convertor_master_t *opal_convertor_find_or_create_master(uint32_t remote_ar
     } else {
         opal_output(0, "Unknown sizeof(bool) for the remote architecture\n");
     }
-
+    if (opal_arch_checkmask(&master->remote_arch, OPAL_ARCH_LONGIS64)) {
+        remote_sizes[OPAL_DATATYPE_LONG] = 8;
+        remote_sizes[OPAL_DATATYPE_UNSIGNED_LONG] = 8;
+    } else {
+        remote_sizes[OPAL_DATATYPE_LONG] = 4;
+        remote_sizes[OPAL_DATATYPE_UNSIGNED_LONG] = 4;
+    }
     /**
      * Now we can compute the conversion mask. For all sizes where the remote
      * and local architecture differ a conversion is needed. Moreover, if the
@@ -434,7 +440,7 @@ int32_t opal_convertor_set_position_nocheck(opal_convertor_t *convertor, size_t
         }
         rc = opal_convertor_generic_simple_position(convertor, position);
         /**
-         * If we have a non-contigous send convertor don't allow it move in the middle
+         * If we have a non-contiguous send convertor don't allow it move in the middle
          * of a predefined datatype, it won't be able to copy out the left-overs
          * anyway. Instead force the position to stay on predefined datatypes
          * boundaries. As we allow partial predefined datatypes on the contiguous
@@ -484,8 +490,8 @@ size_t opal_convertor_compute_remote_size(opal_convertor_t *pConvertor)
     pConvertor->remote_size = pConvertor->local_size;
     if (OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask)) {
         pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS);
-        if (!(pConvertor->flags & CONVERTOR_SEND
-              && pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS)) {
+        /* Can we use the optimized description? */
+        if (pConvertor->flags & OPAL_DATATYPE_OPTIMIZED_RESTRICTED) {
             pConvertor->use_desc = &(datatype->desc);
         }
         if (0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE)) {

diff --git a/opal/datatype/opal_convertor.h b/opal/datatype/opal_convertor.h
@@ -199,22 +199,19 @@ static inline int32_t opal_convertor_need_buffers(const opal_convertor_t *pConve
 size_t opal_convertor_compute_remote_size(opal_convertor_t *pConv);
 
 /**
- * Return the local size of the convertor (count times the size of the datatype).
+ * Return the packed size of the memory layout represented by this
+ * convertor. This is the size of the buffer that would be needed
+ * for the conversion (takes in account the type of the operation,
+ * aka pack or unpack, as well as which side is supposed to do the
+ * type conversion).
  */
-static inline void opal_convertor_get_packed_size(const opal_convertor_t *pConv, size_t *pSize)
+static inline void
+opal_convertor_get_packed_size(const opal_convertor_t *pConv, size_t *pSize)
 {
     *pSize = pConv->local_size;
-}
-
-/**
- * Return the remote size of the convertor (count times the remote size of the
- * datatype). On homogeneous environments the local and remote sizes are
- * identical.
- */
-static inline void opal_convertor_get_unpacked_size(const opal_convertor_t *pConv, size_t *pSize)
-{
-    if (pConv->flags & CONVERTOR_HOMOGENEOUS) {
-        *pSize = pConv->local_size;
+    if ((pConv->flags & CONVERTOR_HOMOGENEOUS) ||
+        ((pConv->flags & CONVERTOR_SEND) && !(pConv->flags & CONVERTOR_SEND_CONVERSION)) ||
+        ((pConv->flags & CONVERTOR_RECV) && (pConv->flags & CONVERTOR_SEND_CONVERSION))) {
         return;
     }
     if (0 == (CONVERTOR_HAS_REMOTE_SIZE & pConv->flags)) {

diff --git a/opal/datatype/opal_copy_functions.c b/opal/datatype/opal_copy_functions.c
@@ -62,10 +62,10 @@
                                                                                                 \
         if ((from_extent == (ptrdiff_t) local_TYPE_size)                                        \
             && (to_extent == (ptrdiff_t) remote_TYPE_size)) {                                   \
-            /* copy of contigous data at both source and destination */                         \
+            /* copy of contiguous data at both source and destination */                         \
             MEMCPY(to, from, count *local_TYPE_size);                                           \
         } else {                                                                                \
-            /* source or destination are non-contigous */                                       \
+            /* source or destination are non-contiguous */                                       \
             for (size_t i = 0; i < count; i++) {                                                \
                 MEMCPY(to, from, local_TYPE_size);                                              \
                 to += to_extent;                                                                \
@@ -254,30 +254,37 @@ COPY_TYPE(wchar, wchar_t, 1)
 /* Table of predefined copy functions - one for each OPAL type */
 /* NOTE: The order of this array *MUST* match the order in opal_datatype_basicDatatypes */
 conversion_fct_t opal_datatype_copy_functions[OPAL_DATATYPE_MAX_PREDEFINED] = {
-    (conversion_fct_t) NULL,                     /* OPAL_DATATYPE_LOOP         */
-    (conversion_fct_t) NULL,                     /* OPAL_DATATYPE_END_LOOP     */
-    (conversion_fct_t) NULL,                     /* OPAL_DATATYPE_LB           */
-    (conversion_fct_t) NULL,                     /* OPAL_DATATYPE_UB           */
-    (conversion_fct_t) copy_bytes_1,             /* OPAL_DATATYPE_INT1         */
-    (conversion_fct_t) copy_bytes_2,             /* OPAL_DATATYPE_INT2         */
-    (conversion_fct_t) copy_bytes_4,             /* OPAL_DATATYPE_INT4         */
-    (conversion_fct_t) copy_bytes_8,             /* OPAL_DATATYPE_INT8         */
-    (conversion_fct_t) copy_bytes_16,            /* OPAL_DATATYPE_INT16        */
-    (conversion_fct_t) copy_bytes_1,             /* OPAL_DATATYPE_UINT1        */
-    (conversion_fct_t) copy_bytes_2,             /* OPAL_DATATYPE_UINT2        */
-    (conversion_fct_t) copy_bytes_4,             /* OPAL_DATATYPE_UINT4        */
-    (conversion_fct_t) copy_bytes_8,             /* OPAL_DATATYPE_UINT8        */
-    (conversion_fct_t) copy_bytes_16,            /* OPAL_DATATYPE_UINT16       */
-    (conversion_fct_t) copy_float_2,             /* OPAL_DATATYPE_FLOAT2       */
-    (conversion_fct_t) copy_float_4,             /* OPAL_DATATYPE_FLOAT4       */
-    (conversion_fct_t) copy_float_8,             /* OPAL_DATATYPE_FLOAT8       */
-    (conversion_fct_t) copy_float_12,            /* OPAL_DATATYPE_FLOAT12       */
-    (conversion_fct_t) copy_float_16,            /* OPAL_DATATYPE_FLOAT16      */
-    (conversion_fct_t) copy_short_float_complex, /* OPAL_DATATYPE_SHORT_FLOAT_COMPLEX */
-    (conversion_fct_t) copy_float_complex,       /* OPAL_DATATYPE_FLOAT_COMPLEX */
-    (conversion_fct_t) copy_double_complex,      /* OPAL_DATATYPE_DOUBLE_COMPLEX */
-    (conversion_fct_t) copy_long_double_complex, /* OPAL_DATATYPE_LONG_DOUBLE_COMPLEX */
-    (conversion_fct_t) copy_bool,                /* OPAL_DATATYPE_BOOL         */
-    (conversion_fct_t) copy_wchar,               /* OPAL_DATATYPE_WCHAR        */
-    (conversion_fct_t) NULL                      /* OPAL_DATATYPE_UNAVAILABLE  */
+    [OPAL_DATATYPE_LOOP]                =     (conversion_fct_t) NULL,
+    [OPAL_DATATYPE_END_LOOP]            =     (conversion_fct_t) NULL,
+    [OPAL_DATATYPE_LB]                  =     (conversion_fct_t) NULL,
+    [OPAL_DATATYPE_UB]                  =     (conversion_fct_t) NULL,
+    [OPAL_DATATYPE_INT1]                =     (conversion_fct_t) copy_bytes_1,
+    [OPAL_DATATYPE_INT2]                =     (conversion_fct_t) copy_bytes_2,
+    [OPAL_DATATYPE_INT4]                =     (conversion_fct_t) copy_bytes_4,
+    [OPAL_DATATYPE_INT8]                =     (conversion_fct_t) copy_bytes_8,
+    [OPAL_DATATYPE_INT16]               =     (conversion_fct_t) copy_bytes_16,
+    [OPAL_DATATYPE_UINT1]               =     (conversion_fct_t) copy_bytes_1,
+    [OPAL_DATATYPE_UINT2]               =     (conversion_fct_t) copy_bytes_2,
+    [OPAL_DATATYPE_UINT4]               =     (conversion_fct_t) copy_bytes_4,
+    [OPAL_DATATYPE_UINT8]               =     (conversion_fct_t) copy_bytes_8,
+    [OPAL_DATATYPE_UINT16]              =     (conversion_fct_t) copy_bytes_16,
+    [OPAL_DATATYPE_FLOAT2]              =     (conversion_fct_t) copy_float_2,
+    [OPAL_DATATYPE_FLOAT4]              =     (conversion_fct_t) copy_float_4,
+    [OPAL_DATATYPE_FLOAT8]              =     (conversion_fct_t) copy_float_8,
+    [OPAL_DATATYPE_FLOAT12]             =     (conversion_fct_t) copy_float_12,
+    [OPAL_DATATYPE_FLOAT16]             =     (conversion_fct_t) copy_float_16,
+    [OPAL_DATATYPE_SHORT_FLOAT_COMPLEX] =     (conversion_fct_t) copy_short_float_complex,
+    [OPAL_DATATYPE_FLOAT_COMPLEX]       =     (conversion_fct_t) copy_float_complex,
+    [OPAL_DATATYPE_DOUBLE_COMPLEX]      =     (conversion_fct_t) copy_double_complex,
+    [OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] =     (conversion_fct_t) copy_long_double_complex,
+    [OPAL_DATATYPE_BOOL]                =     (conversion_fct_t) copy_bool,
+    [OPAL_DATATYPE_WCHAR]               =     (conversion_fct_t) copy_wchar,
+#if SIZEOF_LONG == 4
+    [OPAL_DATATYPE_LONG]                =     (conversion_fct_t)copy_bytes_4,
+    [OPAL_DATATYPE_UNSIGNED_LONG]       =     (conversion_fct_t)copy_bytes_4,
+#elif SIZEOF_LONG == 8
+    [OPAL_DATATYPE_LONG]                =     (conversion_fct_t)copy_bytes_8,
+    [OPAL_DATATYPE_UNSIGNED_LONG]       =     (conversion_fct_t)copy_bytes_8,
+#endif
+    [OPAL_DATATYPE_UNAVAILABLE]         =     NULL,
 };