3
3
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
4
4
* University Research and Technology
5
5
* Corporation. All rights reserved.
6
- * Copyright (c) 2004-2016 The University of Tennessee and The University
6
+ * Copyright (c) 2004-2017 The University of Tennessee and The University
7
7
* of Tennessee Research Foundation. All rights
8
8
* reserved.
9
9
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
43
43
CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) )
44
44
#endif
45
45
46
- extern int opal_convertor_create_stack_with_pos_general ( opal_convertor_t * convertor ,
47
- int starting_point , const int * sizes );
48
-
49
46
static void opal_convertor_construct ( opal_convertor_t * convertor )
50
47
{
51
48
convertor -> pStack = convertor -> static_stack ;
@@ -226,7 +223,7 @@ int32_t opal_convertor_pack( opal_convertor_t* pConv,
226
223
if ( OPAL_LIKELY (pConv -> flags & CONVERTOR_NO_OP ) ) {
227
224
/**
228
225
* We are doing conversion on a contiguous datatype on a homogeneous
229
- * environment. The convertor contain minimal informations , we only
226
+ * environment. The convertor contain minimal information , we only
230
227
* use the bConverted to manage the conversion.
231
228
*/
232
229
uint32_t i ;
@@ -447,31 +444,49 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
447
444
return rc ;
448
445
}
449
446
447
+ static size_t
448
+ opal_datatype_compute_remote_size ( const opal_datatype_t * pData ,
449
+ const size_t * sizes )
450
+ {
451
+ uint32_t typeMask = pData -> bdt_used ;
452
+ size_t length = 0 ;
453
+
454
+ if ( OPAL_UNLIKELY (NULL == pData -> ptypes ) ) {
455
+ /* Allocate and fill the array of types used in the datatype description */
456
+ opal_datatype_compute_ptypes ( (opal_datatype_t * )pData );
457
+ }
458
+
459
+ for ( int i = OPAL_DATATYPE_FIRST_TYPE ; typeMask && (i < OPAL_DATATYPE_MAX_PREDEFINED ); i ++ ) {
460
+ if ( typeMask & ((uint32_t )1 << i ) ) {
461
+ length += (pData -> ptypes [i ] * sizes [i ]);
462
+ typeMask ^= ((uint32_t )1 << i );
463
+ }
464
+ }
465
+ return length ;
466
+ }
450
467
451
468
/**
452
469
* Compute the remote size. If necessary remove the homogeneous flag
453
470
* and redirect the convertor description toward the non-optimized
454
471
* datatype representation.
455
472
*/
456
- #define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE (convertor , datatype , bdt_mask ) \
457
- { \
458
- if( OPAL_UNLIKELY(0 != (bdt_mask)) ) { \
459
- opal_convertor_master_t* master; \
460
- int i; \
461
- uint32_t mask = datatype->bdt_used; \
462
- convertor->flags &= (~CONVERTOR_HOMOGENEOUS); \
463
- master = convertor->master; \
464
- convertor->remote_size = 0; \
465
- for( i = OPAL_DATATYPE_FIRST_TYPE; mask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) { \
466
- if( mask & ((uint32_t)1 << i) ) { \
467
- convertor->remote_size += (datatype->btypes[i] * \
468
- master->remote_sizes[i]); \
469
- mask ^= ((uint32_t)1 << i); \
470
- } \
471
- } \
472
- convertor->remote_size *= convertor->count; \
473
- convertor->use_desc = &(datatype->desc); \
474
- } \
473
+ size_t opal_convertor_compute_remote_size ( opal_convertor_t * pConvertor )
474
+ {
475
+ opal_datatype_t * datatype = (opal_datatype_t * )pConvertor -> pDesc ;
476
+
477
+ pConvertor -> remote_size = pConvertor -> local_size ;
478
+ if ( OPAL_UNLIKELY (datatype -> bdt_used & pConvertor -> master -> hetero_mask ) ) {
479
+ pConvertor -> flags &= (~CONVERTOR_HOMOGENEOUS );
480
+ pConvertor -> use_desc = & (datatype -> desc );
481
+ if ( 0 == (pConvertor -> flags & CONVERTOR_HAS_REMOTE_SIZE ) ) {
482
+ /* This is for a single datatype, we must update it with the count */
483
+ pConvertor -> remote_size = opal_datatype_compute_remote_size (datatype ,
484
+ pConvertor -> master -> remote_sizes );
485
+ pConvertor -> remote_size *= pConvertor -> count ;
486
+ }
487
+ }
488
+ pConvertor -> flags |= CONVERTOR_HAS_REMOTE_SIZE ;
489
+ return pConvertor -> remote_size ;
475
490
}
476
491
477
492
/**
@@ -483,29 +498,26 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
483
498
*/
484
499
#define OPAL_CONVERTOR_PREPARE ( convertor , datatype , count , pUserBuf ) \
485
500
{ \
486
- uint32_t bdt_mask; \
487
- \
501
+ convertor->local_size = count * datatype->size; \
502
+ convertor->pBaseBuf = (unsigned char*)pUserBuf; \
503
+ convertor->count = count; \
504
+ convertor->pDesc = (opal_datatype_t*)datatype; \
505
+ convertor->bConverted = 0; \
506
+ convertor->use_desc = &(datatype->opt_desc); \
488
507
/* If the data is empty we just mark the convertor as \
489
508
* completed. With this flag set the pack and unpack functions \
490
509
* will not do anything. \
491
510
*/ \
492
511
if ( OPAL_UNLIKELY ((0 == count ) || (0 == datatype -> size )) ) { \
493
- convertor -> flags |= OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED ; \
512
+ convertor -> flags |= ( OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED | CONVERTOR_HAS_REMOTE_SIZE ); \
494
513
convertor -> local_size = convertor -> remote_size = 0 ; \
495
514
return OPAL_SUCCESS ; \
496
515
} \
497
- /* Compute the local in advance */ \
498
- convertor -> local_size = count * datatype -> size ; \
499
- convertor -> pBaseBuf = (unsigned char * )pUserBuf ; \
500
- convertor -> count = count ; \
501
516
\
502
517
/* Grab the datatype part of the flags */ \
503
518
convertor -> flags &= CONVERTOR_TYPE_MASK ; \
504
519
convertor -> flags |= (CONVERTOR_DATATYPE_MASK & datatype -> flags ); \
505
520
convertor -> flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS ); \
506
- convertor -> pDesc = (opal_datatype_t * )datatype ; \
507
- convertor -> bConverted = 0 ; \
508
- convertor -> use_desc = & (datatype -> opt_desc ); \
509
521
\
510
522
convertor -> remote_size = convertor -> local_size ; \
511
523
if ( OPAL_LIKELY (convertor -> remoteArch == opal_local_arch ) ) { \
@@ -516,9 +528,8 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
516
528
} \
517
529
} \
518
530
\
519
- bdt_mask = datatype -> bdt_used & convertor -> master -> hetero_mask ; \
520
- OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE ( convertor , datatype , \
521
- bdt_mask ); \
531
+ assert ( (convertor )-> pDesc == (datatype ) ); \
532
+ opal_convertor_compute_remote_size ( convertor ); \
522
533
assert ( NULL != convertor -> use_desc -> desc ); \
523
534
/* For predefined datatypes (contiguous) do nothing more */ \
524
535
/* if checksum is enabled then always continue */ \
@@ -530,7 +541,7 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
530
541
} \
531
542
convertor -> flags &= ~CONVERTOR_NO_OP ; \
532
543
{ \
533
- uint32_t required_stack_length = datatype -> btypes [ OPAL_DATATYPE_LOOP ] + 1 ; \
544
+ uint32_t required_stack_length = datatype -> loops + 1 ; \
534
545
\
535
546
if ( required_stack_length > convertor -> stack_size ) { \
536
547
assert (convertor -> pStack == convertor -> static_stack ); \
@@ -714,8 +725,8 @@ void opal_datatype_dump_stack( const dt_stack_t* pStack, int stack_pos,
714
725
opal_output ( 0 , "%d: pos %d count %d disp %ld " , stack_pos , pStack [stack_pos ].index ,
715
726
(int )pStack [stack_pos ].count , (long )pStack [stack_pos ].disp );
716
727
if ( pStack -> index != -1 )
717
- opal_output ( 0 , "\t[desc count %d disp %ld extent %ld]\n" ,
718
- pDesc [pStack [stack_pos ].index ].elem .count ,
728
+ opal_output ( 0 , "\t[desc count %lu disp %ld extent %ld]\n" ,
729
+ ( unsigned long ) pDesc [pStack [stack_pos ].index ].elem .count ,
719
730
(long )pDesc [pStack [stack_pos ].index ].elem .disp ,
720
731
(long )pDesc [pStack [stack_pos ].index ].elem .extent );
721
732
else
0 commit comments