From 20bd5f900f0f21f5e28d310db7ab14b1b9c4d8ed Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Fri, 18 Aug 2017 17:14:36 +0200 Subject: [PATCH 01/31] Initial approach towards DART data types for advanced memory access patterns --- dart-if/include/dash/dart/if/dart_types.h | 94 ++++++++++++++- .../dash/dart/mpi/dart_communication_priv.h | 110 +++++++++++------- dart-impl/mpi/src/dart_communication.c | 106 +++++------------ 3 files changed, 186 insertions(+), 124 deletions(-) diff --git a/dart-if/include/dash/dart/if/dart_types.h b/dart-if/include/dash/dart/if/dart_types.h index 3ddb729b3..af69d198c 100644 --- a/dart-if/include/dash/dart/if/dart_types.h +++ b/dart-if/include/dash/dart/if/dart_types.h @@ -96,10 +96,11 @@ typedef enum * * \ingroup DartTypes */ +/** typedef enum { DART_TYPE_UNDEFINED = 0, - /// integral data types + /// integral data types DART_TYPE_BYTE, DART_TYPE_SHORT, DART_TYPE_INT, @@ -113,6 +114,35 @@ typedef enum /// Reserved, do not use! DART_TYPE_COUNT } dart_datatype_t; +**/ + +typedef intptr_t dart_datatype_t; +#if 0 +extern struct dart_datatype_struct __dart_type_undefined_t; +extern struct dart_datatype_struct __dart_type_byte_t; +extern struct dart_datatype_struct __dart_type_short_t; +extern struct dart_datatype_struct __dart_type_int_t; +extern struct dart_datatype_struct __dart_type_uint_t; +extern struct dart_datatype_struct __dart_type_long_t; +extern struct dart_datatype_struct __dart_type_ulong_t; +extern struct dart_datatype_struct __dart_type_longlong_t; +extern struct dart_datatype_struct __dart_type_float_t; +extern struct dart_datatype_struct __dart_type_double_t; +#endif + + +#define DART_TYPE_UNDEFINED (dart_datatype_t)(0) +#define DART_TYPE_BYTE (dart_datatype_t)(1) +#define DART_TYPE_SHORT (dart_datatype_t)(2) +#define DART_TYPE_INT (dart_datatype_t)(3) +#define DART_TYPE_UINT (dart_datatype_t)(4) +#define DART_TYPE_LONG (dart_datatype_t)(5) +#define DART_TYPE_ULONG (dart_datatype_t)(6) +#define DART_TYPE_LONGLONG (dart_datatype_t)(7) +#define DART_TYPE_FLOAT (dart_datatype_t)(8) +#define DART_TYPE_DOUBLE (dart_datatype_t)(9) +#define DART_TYPE_LAST (dart_datatype_t)(10) + /** size for integral \c size_t */ #if (UINT32_MAX == SIZE_MAX) @@ -236,11 +266,11 @@ typedef int16_t dart_team_t; /** * Levels of thread-support offered by DART. - * \ref DART_THREAD_MULTIPLE is supported if + * \ref DART_THREAD_MULTIPLE is supported if * DART has been build with \c DART_ENABLE_THREADSUPPORT - * and the underlying communication backend supports + * and the underlying communication backend supports * thread-safe access. - * + * */ typedef enum { @@ -660,6 +690,62 @@ typedef struct } dart_config_t; +/** + * Create a strided data type using blocks of size \c blocklen and a stride + * of \c stride. + * + * \param basetype The type of elements in the blocks. + * \param stride The stride between blocks. + * \param blocklen The number of elements of type \c basetype in each block. + * \param[out] newtype The newly created data type. + * + * \return \ref DART_OK on success, any other of \ref dart_ret_t otherwise. + * + * \ingroup DartTypes + */ +dart_ret_t +dart_type_create_strided( + dart_datatype_t basetype, + size_t stride, + size_t blocklen, + dart_datatype_t * newtype); + + +/** + * Create an indexed data type using \c count blocks of size \c blocklen[i] + * with offsets \c offset[i] for each 0 <= i < count. + * + * \param basetype The type of elements in the blocks. + * \param count The number of blocks. + * \param blocklen The number of elements of type \c basetype in block[i]. + * \param offset The offset of block[i]. + * \param[out] newtype The newly created data type. + * + * \return \ref DART_OK on success, any other of \ref dart_ret_t otherwise. + * + * \ingroup DartTypes + */ +dart_ret_t +dart_type_create_indexed( + dart_datatype_t basetype, + size_t count, + const size_t blocklen[], + const size_t offset[], + dart_datatype_t * newtype); + +/** + * Destroy a data type that was previously created using + * \ref dart_type_create_strided or \ref dart_type_create_indexed. + * + * \param dart_type The type to be destroyed. + * + * \return \ref DART_OK on success, any other of \ref dart_ret_t otherwise. + * + * \ingroup DartTypes + */ +dart_ret_t +dart_type_destroy(dart_datatype_t *dart_type); + /** \cond DART_HIDDEN_SYMBOLS */ #define DART_INTERFACE_OFF /** \endcond */ diff --git a/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h b/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h index b487ef90e..4d65f79f7 100644 --- a/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h +++ b/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h @@ -14,19 +14,48 @@ #include #include #include +#include + + +/** + * The maximum number of elements of a certain type to be + * transfered in one chunk. + */ +#define MAX_CONTIG_ELEMENTS INT_MAX + +typedef enum { + DART_KIND_BASIC = 0, + DART_KIND_STRIDED, + DART_KIND_INDEXED +} dart_type_kind_t; + +typedef struct dart_datatype_struct { + MPI_Datatype mpi_type; + dart_datatype_t base_type; + dart_type_kind_t kind; + union { + // used for basic types + struct { + size_t size; + MPI_Datatype max_contig_type; + } basic; + // used for DART_KIND_STRIDED + struct { + size_t stride; + size_t blocklen; + } strided; + // used for DART_KIND_INDEXED + struct { + int * blocklens; + int * offsets; + int count; + } indexed; + }; +} dart_datatype_struct_t; DART_INTERNAL -extern int dart__mpi__datatype_sizes[DART_TYPE_COUNT]; +extern dart_datatype_struct_t __dart_base_types[DART_TYPE_LAST]; -/** DART handle type for non-blocking one-sided operations. */ -struct dart_handle_struct -{ - MPI_Request reqs[2]; // a large transfer might consist of two operations - MPI_Win win; - dart_unit_t dest; - uint8_t num_reqs; - bool needs_flush; -}; dart_ret_t dart__mpi__datatype_init() DART_INTERNAL; @@ -34,7 +63,7 @@ dart__mpi__datatype_init() DART_INTERNAL; dart_ret_t dart__mpi__datatype_fini() DART_INTERNAL; -static inline MPI_Op dart__mpi__op(dart_operation_t dart_op) { +DART_INLINE MPI_Op dart__mpi__op(dart_operation_t dart_op) { switch (dart_op) { case DART_OP_MIN : return MPI_MIN; case DART_OP_MAX : return MPI_MAX; @@ -52,46 +81,37 @@ static inline MPI_Op dart__mpi__op(dart_operation_t dart_op) { } } -static inline MPI_Datatype dart__mpi__datatype(dart_datatype_t dart_datatype) { - switch (dart_datatype) { - case DART_TYPE_BYTE : return MPI_BYTE; - case DART_TYPE_SHORT : return MPI_SHORT; - case DART_TYPE_INT : return MPI_INT; - case DART_TYPE_UINT : return MPI_UNSIGNED; - case DART_TYPE_LONG : return MPI_LONG; - case DART_TYPE_ULONG : return MPI_UNSIGNED_LONG; - case DART_TYPE_LONGLONG : return MPI_LONG_LONG_INT; - case DART_TYPE_FLOAT : return MPI_FLOAT; - case DART_TYPE_DOUBLE : return MPI_DOUBLE; - default : return (MPI_Datatype)(-1); - } +DART_INLINE +dart_datatype_struct_t * dart__mpi__datatype_struct( + dart_datatype_t dart_datatype) +{ + return (dart_datatype < DART_TYPE_LAST) + ? &__dart_base_types[dart_datatype] + : (dart_datatype_struct_t *)dart_datatype; } -static inline int dart__mpi__datatype_sizeof(dart_datatype_t dart_datatype) { +DART_INLINE +MPI_Datatype dart__mpi__datatype(dart_datatype_t dart_datatype) { + return dart__mpi__datatype_struct(dart_datatype)->mpi_type; +} - if (dart_datatype > DART_TYPE_UNDEFINED && dart_datatype < DART_TYPE_COUNT) - { - return dart__mpi__datatype_sizes[dart_datatype]; - } - return -1; +DART_INLINE +int dart__mpi__datatype_sizeof(dart_datatype_t dart_type) { + dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); + return (dts->kind == DART_KIND_BASIC) ? dts->basic.size : -1; } +DART_INLINE +bool dart__mpi__datatype_isbasic(dart_datatype_t dart_type) { + return (dart__mpi__datatype_struct(dart_type)->kind == DART_KIND_BASIC); +} -#if 0 -static inline int dart_mpi_datatype_disp_unit(dart_datatype_t dart_datatype) { - switch (dart_datatype) { - case DART_TYPE_BYTE : return 1; - case DART_TYPE_SHORT : return 1; - case DART_TYPE_INT : return 4; - case DART_TYPE_UINT : return 4; - case DART_TYPE_LONG : return 4; - case DART_TYPE_ULONG : return 4; - case DART_TYPE_LONGLONG : return 8; - case DART_TYPE_FLOAT : return 4; - case DART_TYPE_DOUBLE : return 8; - default : return 1; - } +DART_INLINE +MPI_Datatype dart__mpi__datatype_contigtype(dart_datatype_t dart_type) { + dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); + return (dts->kind == DART_KIND_BASIC) + ? dts->basic.max_contig_type : MPI_DATATYPE_NULL; } -#endif + #endif /* DART_ADAPT_COMMUNICATION_PRIV_H_INCLUDED */ diff --git a/dart-impl/mpi/src/dart_communication.c b/dart-impl/mpi/src/dart_communication.c index 34e2d6a24..b6611fbf0 100644 --- a/dart-impl/mpi/src/dart_communication.c +++ b/dart-impl/mpi/src/dart_communication.c @@ -30,11 +30,6 @@ #include #include -/** - * The maximum number of elements of a certain type to be - * transfered in one chunk. - */ -#define MAX_CONTIG_ELEMENTS INT_MAX #define CHECK_UNITID_RANGE(_unitid, _team_data) \ do { \ @@ -45,7 +40,6 @@ } \ } while (0) - /** * Temporary space allocation: * - on the stack for allocations <=64B @@ -62,53 +56,15 @@ free(__ptr); \ } while (0) -int dart__mpi__datatype_sizes[DART_TYPE_COUNT]; -static MPI_Datatype dart__mpi__max_chunk_datatype[DART_TYPE_COUNT]; - -dart_ret_t -dart__mpi__datatype_init() +/** DART handle type for non-blocking one-sided operations. */ +struct dart_handle_struct { - for (int i = DART_TYPE_UNDEFINED+1; i < DART_TYPE_COUNT; i++) { - - // query the size of the data type - int ret = MPI_Type_size( - dart__mpi__datatype(i), - &dart__mpi__datatype_sizes[i]); - if (ret != MPI_SUCCESS) { - DART_LOG_ERROR("Failed to query size of DART data type %i", i); - return DART_ERR_INVAL; - } - - // create the chunk data type - ret = MPI_Type_contiguous(MAX_CONTIG_ELEMENTS, - dart__mpi__datatype(i), - &dart__mpi__max_chunk_datatype[i]); - if (ret != MPI_SUCCESS) { - DART_LOG_ERROR("Failed to create chunk type of DART data type %i", i); - return DART_ERR_INVAL; - } - ret = MPI_Type_commit(&dart__mpi__max_chunk_datatype[i]); - if (ret != MPI_SUCCESS) { - DART_LOG_ERROR("Failed to commit chunk type of DART data type %i", i); - return DART_ERR_INVAL; - } - - } - return DART_OK; -} - -dart_ret_t -dart__mpi__datatype_fini() -{ - for (int i = DART_TYPE_UNDEFINED+1; i < DART_TYPE_COUNT; i++) { - int ret = MPI_Type_free(&dart__mpi__max_chunk_datatype[i]); - if (ret != MPI_SUCCESS) { - DART_LOG_ERROR("Failed to commit chunk type of DART data type %i", i); - return DART_ERR_INVAL; - } - } - return DART_OK; -} + MPI_Request reqs[2]; // a large transfer might consist of two operations + MPI_Win win; + dart_unit_t dest; + uint8_t num_reqs; + bool needs_flush; +}; #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) static dart_ret_t get_shared_mem( @@ -241,11 +197,11 @@ dart_ret_t dart_get( dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); if (MPI_Get(dest_ptr, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_contigtype(dtype), team_unit_id.id, offset, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_contigtype(dtype), win) != MPI_SUCCESS) { DART_LOG_ERROR("dart_get ! MPI_Get failed"); return DART_ERR_INVAL; @@ -341,11 +297,11 @@ dart_ret_t dart_put( src_ptr, nchunks * MAX_CONTIG_ELEMENTS); if (MPI_Put(src_ptr, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_contigtype(dtype), team_unit_id.id, offset, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_contigtype(dtype), win) != MPI_SUCCESS) { DART_LOG_ERROR("dart_put ! MPI_Put failed"); return DART_ERR_INVAL; @@ -433,11 +389,11 @@ dart_ret_t dart_accumulate( if (MPI_Accumulate( src_ptr, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_contigtype(dtype), team_unit_id.id, offset, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_contigtype(dtype), mpi_op, win) != MPI_SUCCESS) { DART_LOG_ERROR("MPI_Accumulate ! MPI_Put failed"); @@ -484,7 +440,7 @@ dart_ret_t dart_fetch_and_op( int16_t seg_id = gptr.segid; mpi_dtype = dart__mpi__datatype(dtype); mpi_op = dart__mpi__op(op); - + dart_team_data_t *team_data = dart_adapt_teamlist_get(gptr.teamid); if (team_data == NULL) { DART_LOG_ERROR("dart_fetch_and_op ! failed: Unknown team %i!", @@ -688,11 +644,11 @@ dart_ret_t dart_get_handle( dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); if (MPI_Rget(dest_ptr, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_contigtype(dtype), team_unit_id.id, offset, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_contigtype(dtype), win, &handle->reqs[handle->num_reqs++]) != MPI_SUCCESS) { free(handle); @@ -743,7 +699,7 @@ dart_ret_t dart_put_handle( MPI_Win win; *handleptr = DART_HANDLE_NULL; - + dart_team_data_t *team_data = dart_adapt_teamlist_get(gptr.teamid); if (team_data == NULL) { DART_LOG_ERROR("dart_put_handle ! failed: Unknown team %i!", gptr.teamid); @@ -793,11 +749,11 @@ dart_ret_t dart_put_handle( src_ptr, nchunks * MAX_CONTIG_ELEMENTS); if (MPI_Rput(src_ptr, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_contigtype(dtype), team_unit_id.id, offset, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_contigtype(dtype), win, &handle->reqs[handle->num_reqs++]) != MPI_SUCCESS) { free(handle); @@ -960,11 +916,11 @@ dart_ret_t dart_put_blocking( src_ptr, nchunks * MAX_CONTIG_ELEMENTS); if (MPI_Put(src_ptr, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_contigtype(dtype), team_unit_id.id, offset, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_contigtype(dtype), win) != MPI_SUCCESS) { DART_LOG_ERROR("dart_put_blocking ! MPI_Put failed"); return DART_ERR_INVAL; @@ -1105,11 +1061,11 @@ dart_ret_t dart_get_blocking( dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); if (MPI_Rget(dest_ptr, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_contigtype(dtype), team_unit_id.id, offset, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_contigtype(dtype), win, &reqs[nreqs++]) != MPI_SUCCESS) { DART_LOG_ERROR("dart_get ! MPI_Get failed"); @@ -1693,7 +1649,7 @@ dart_ret_t dart_bcast( if (nchunks > 0) { if (MPI_Bcast(src_ptr, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_contigtype(dtype), root.id, comm) != MPI_SUCCESS) { DART_LOG_ERROR("dart_bcast ! root:%d -> team:%d " "MPI_Bcast failed", root.id, teamid); @@ -1740,7 +1696,7 @@ dart_ret_t dart_scatter( MPI_Comm comm = team_data->comm; if (nchunks > 0) { - MPI_Datatype mpi_dtype = dart__mpi__max_chunk_datatype[dtype]; + MPI_Datatype mpi_dtype = dart__mpi__datatype_contigtype(dtype); if (MPI_Scatter( send_ptr, nchunks, @@ -1806,7 +1762,7 @@ dart_ret_t dart_gather( MPI_Comm comm = team_data->comm; if (nchunks > 0) { - MPI_Datatype mpi_dtype = dart__mpi__max_chunk_datatype[dtype]; + MPI_Datatype mpi_dtype = dart__mpi__datatype_contigtype(dtype); if (MPI_Gather( send_ptr, nchunks, @@ -1873,7 +1829,7 @@ dart_ret_t dart_allgather( MPI_Comm comm = team_data->comm; if (nchunks > 0) { - MPI_Datatype mpi_dtype = dart__mpi__max_chunk_datatype[dtype]; + MPI_Datatype mpi_dtype = dart__mpi__datatype_contigtype(dtype); if (MPI_Allgather( send_ptr, nchunks, @@ -1947,8 +1903,8 @@ dart_ret_t dart_allgatherv( int *inrecvcounts = malloc(sizeof(int) * comm_size); int *irecvdispls = malloc(sizeof(int) * comm_size); for (int i = 0; i < comm_size; i++) { - if (nrecvcounts[i] > MAX_CONTIG_ELEMENTS || - recvdispls[i] > MAX_CONTIG_ELEMENTS) + if (nrecvcounts[i] > MAX_CONTIG_ELEMENTS || + recvdispls[i] > MAX_CONTIG_ELEMENTS) { DART_LOG_ERROR( "dart_allgatherv ! failed: nrecvcounts[%i] (%zu) > INT_MAX || " @@ -2047,7 +2003,7 @@ dart_ret_t dart_reduce( DART_LOG_ERROR("dart_reduce ! unknown teamid %d", team); return DART_ERR_INVAL; } - + CHECK_UNITID_RANGE(root, team_data); comm = team_data->comm; From 77d5fe49eb395e6ac66f9785e995854410d11dda Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 21 Aug 2017 11:07:06 +0200 Subject: [PATCH 02/31] Add missing file --- dart-impl/mpi/src/dart_mpi_types.c | 221 +++++++++++++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 dart-impl/mpi/src/dart_mpi_types.c diff --git a/dart-impl/mpi/src/dart_mpi_types.c b/dart-impl/mpi/src/dart_mpi_types.c new file mode 100644 index 000000000..226d1d77b --- /dev/null +++ b/dart-impl/mpi/src/dart_mpi_types.c @@ -0,0 +1,221 @@ +/** + * \file dart_mpi_types.c + * + * Provide functionality for creating derived data types in DART. + * + * Currently implemented: strided types based on basic types. + */ + +#include +#include +#include +#include + +#include +#include +#include + +dart_datatype_struct_t __dart_base_types[DART_TYPE_LAST]; + +static void +init_basic_datatype( + dart_datatype_t dart_type_id, + MPI_Datatype mpi_type) +{ + int size; + struct dart_datatype_struct *dart_type = &__dart_base_types[dart_type_id]; + dart_type->base_type = DART_TYPE_UNDEFINED; + dart_type->mpi_type = mpi_type; + dart_type->kind = DART_KIND_BASIC; + int ret = MPI_Type_size(mpi_type, &size); + if (ret != MPI_SUCCESS) { + DART_LOG_ERROR("Failed to query size of MPI data type!"); + dart_abort(-1); + } + dart_type->basic.size = size; + + if (mpi_type != MPI_DATATYPE_NULL) { + // create the chunk data type + MPI_Datatype max_contig_type; + ret = MPI_Type_contiguous(MAX_CONTIG_ELEMENTS, + mpi_type, + &max_contig_type); + if (ret != MPI_SUCCESS) { + DART_LOG_ERROR("Failed to create chunk type of DART data type"); + dart_abort(-1); + } + ret = MPI_Type_commit(&max_contig_type); + if (ret != MPI_SUCCESS) { + DART_LOG_ERROR("Failed to commit chunk type of DART data type"); + dart_abort(-1); + } + dart_type->basic.max_contig_type = max_contig_type; + } + +} + +dart_ret_t +dart__mpi__datatype_init() +{ + init_basic_datatype(DART_TYPE_UNDEFINED, MPI_DATATYPE_NULL); + init_basic_datatype(DART_TYPE_BYTE, MPI_BYTE); + init_basic_datatype(DART_TYPE_SHORT, MPI_SHORT); + init_basic_datatype(DART_TYPE_INT, MPI_INT); + init_basic_datatype(DART_TYPE_UINT, MPI_UNSIGNED); + init_basic_datatype(DART_TYPE_LONG, MPI_LONG); + init_basic_datatype(DART_TYPE_ULONG, MPI_UNSIGNED_LONG); + init_basic_datatype(DART_TYPE_LONGLONG, MPI_LONG_LONG); + init_basic_datatype(DART_TYPE_FLOAT, MPI_FLOAT); + init_basic_datatype(DART_TYPE_DOUBLE, MPI_DOUBLE); + + return DART_OK; +} + +dart_ret_t +dart_type_create_strided( + dart_datatype_t basetype_id, + size_t stride, + size_t blocklen, + dart_datatype_t * newtype) +{ + *newtype = DART_TYPE_UNDEFINED; + + dart_datatype_struct_t *basetype = dart__mpi__datatype_struct(basetype_id); + + if (basetype->kind != DART_KIND_BASIC) { + DART_LOG_ERROR("Only basic data types allowed in strided datatypes!"); + return DART_ERR_INVAL; + } + + MPI_Datatype mpi_dtype = basetype->mpi_type; + MPI_Datatype new_mpi_dtype; + MPI_Type_vector(1, blocklen, stride, mpi_dtype, &new_mpi_dtype); + MPI_Type_commit(&new_mpi_dtype); + dart_datatype_struct_t *new_struct; + new_struct = malloc(sizeof(struct dart_datatype_struct)); + new_struct->mpi_type = new_mpi_dtype; + new_struct->base_type = basetype_id; + new_struct->kind = DART_KIND_STRIDED; + new_struct->strided.blocklen = blocklen; + new_struct->strided.stride = stride; + + *newtype = (dart_datatype_t)new_struct; + + return DART_OK; +} + + +dart_ret_t +dart_type_create_indexed( + dart_datatype_t basetype, + size_t count, + const size_t blocklen[], + const size_t offset[], + dart_datatype_t * newtype) +{ + *newtype = DART_TYPE_UNDEFINED; + + if (dart__mpi__datatype_struct(basetype)->kind != DART_KIND_BASIC) { + DART_LOG_ERROR("Only basic data types allowed in indexed datatypes!"); + return DART_ERR_INVAL; + } + + // check for overflows + if (count > INT_MAX) { + DART_LOG_ERROR("dart_type_create_indexed: count > INT_MAX"); + return DART_ERR_INVAL; + } + + int *mpi_blocklen = malloc(sizeof(int) * count); + int *mpi_disps = malloc(sizeof(int) * count); + + for (size_t i = 0; i < count; ++i) { + if (blocklen[i] > INT_MAX) { + DART_LOG_ERROR("dart_type_create_indexed: blocklen[%zu] > INT_MAX", i); + free(mpi_blocklen); + free(mpi_disps); + return DART_ERR_INVAL; + } + if (offset[i] > INT_MAX) { + DART_LOG_ERROR("dart_type_create_indexed: offset[%zu] > INT_MAX", i); + free(mpi_blocklen); + free(mpi_disps); + return DART_ERR_INVAL; + } + mpi_blocklen[i] = blocklen[i]; + mpi_disps[i] = offset[i]; + } + + MPI_Datatype mpi_base_type = dart__mpi__datatype_struct(basetype)->mpi_type; + MPI_Datatype mpi_dtype; + int ret = MPI_Type_indexed( + count, mpi_blocklen, mpi_disps, mpi_base_type, &mpi_dtype); + if (ret != MPI_SUCCESS) { + DART_LOG_ERROR("dart_type_create_indexed: failed to create indexed type!"); + free(mpi_blocklen); + free(mpi_disps); + return DART_ERR_INVAL; + } + + MPI_Type_commit(&mpi_dtype); + dart_datatype_struct_t *new_struct; + new_struct = malloc(sizeof(struct dart_datatype_struct)); + new_struct->base_type = basetype; + new_struct->mpi_type = mpi_dtype; + new_struct->kind = DART_KIND_INDEXED; + new_struct->indexed.blocklens = mpi_blocklen; + new_struct->indexed.offsets = mpi_disps; + + *newtype = (dart_datatype_t)new_struct; + + return DART_OK; +} + +dart_ret_t +dart_type_destroy(dart_datatype_t *dart_type_ptr) +{ + if (dart_type_ptr == NULL) { + return DART_ERR_INVAL; + } + + dart_datatype_struct_t *dart_type = dart__mpi__datatype_struct(*dart_type_ptr); + + if (dart_type->kind == DART_KIND_BASIC) { + DART_LOG_ERROR("dart_type_destroy: Cannot destroy basic type!"); + return DART_ERR_INVAL; + } + + if (dart_type->kind == DART_KIND_INDEXED) { + free(dart_type->indexed.blocklens); + dart_type->indexed.blocklens = NULL; + free(dart_type->indexed.offsets); + dart_type->indexed.offsets = NULL; + } + free(dart_type); + *dart_type_ptr = DART_TYPE_UNDEFINED; + + return DART_OK; +} + +static void destroy_basic_type(dart_datatype_t dart_type_id) +{ + dart_datatype_struct_t *dart_type = dart__mpi__datatype_struct(dart_type_id); + MPI_Type_free(&dart_type->basic.max_contig_type); + dart_type->basic.max_contig_type = MPI_DATATYPE_NULL; +} + +dart_ret_t +dart__mpi__datatype_fini() +{ + destroy_basic_type(DART_TYPE_BYTE); + destroy_basic_type(DART_TYPE_SHORT); + destroy_basic_type(DART_TYPE_INT); + destroy_basic_type(DART_TYPE_UINT); + destroy_basic_type(DART_TYPE_LONG); + destroy_basic_type(DART_TYPE_ULONG); + destroy_basic_type(DART_TYPE_LONGLONG); + destroy_basic_type(DART_TYPE_FLOAT); + destroy_basic_type(DART_TYPE_DOUBLE); + + return DART_OK; +} From 0cceb1395ac5ab95b12b9d93f6a599692ad8bdbd Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Thu, 16 Nov 2017 11:04:29 +0900 Subject: [PATCH 03/31] Create a max_type for every type, not just basic --- .../dash/dart/mpi/dart_communication_priv.h | 7 +- dart-impl/mpi/src/dart_mpi_types.c | 82 +++++++++++++------ 2 files changed, 62 insertions(+), 27 deletions(-) diff --git a/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h b/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h index 4d65f79f7..354b417a6 100644 --- a/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h +++ b/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h @@ -33,11 +33,11 @@ typedef struct dart_datatype_struct { MPI_Datatype mpi_type; dart_datatype_t base_type; dart_type_kind_t kind; + MPI_Datatype max_type; union { // used for basic types struct { size_t size; - MPI_Datatype max_contig_type; } basic; // used for DART_KIND_STRIDED struct { @@ -107,10 +107,9 @@ bool dart__mpi__datatype_isbasic(dart_datatype_t dart_type) { } DART_INLINE -MPI_Datatype dart__mpi__datatype_contigtype(dart_datatype_t dart_type) { +MPI_Datatype dart__mpi__datatype_maxtype(dart_datatype_t dart_type) { dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); - return (dts->kind == DART_KIND_BASIC) - ? dts->basic.max_contig_type : MPI_DATATYPE_NULL; + return dts->max_type; } diff --git a/dart-impl/mpi/src/dart_mpi_types.c b/dart-impl/mpi/src/dart_mpi_types.c index 226d1d77b..abc65b84a 100644 --- a/dart-impl/mpi/src/dart_mpi_types.c +++ b/dart-impl/mpi/src/dart_mpi_types.c @@ -17,6 +17,36 @@ dart_datatype_struct_t __dart_base_types[DART_TYPE_LAST]; +static +MPI_Datatype +create_max_datatype(MPI_Datatype mpi_type) +{ + MPI_Datatype max_type = MPI_DATATYPE_NULL; + if (mpi_type != MPI_DATATYPE_NULL) { + // create the chunk data type + int ret = MPI_Type_contiguous(MAX_CONTIG_ELEMENTS, + mpi_type, + &max_type); + if (ret != MPI_SUCCESS) { + DART_LOG_ERROR("Failed to create chunk type of DART data type"); + dart_abort(-1); + } + ret = MPI_Type_commit(&max_type); + if (ret != MPI_SUCCESS) { + DART_LOG_ERROR("Failed to commit chunk type of DART data type"); + dart_abort(-1); + } + } + return max_type; +} + +static void +destroy_max_type(dart_datatype_struct_t *dart_type) +{ + MPI_Type_free(&dart_type->max_type); + dart_type->max_type = MPI_DATATYPE_NULL; +} + static void init_basic_datatype( dart_datatype_t dart_type_id, @@ -34,23 +64,8 @@ init_basic_datatype( } dart_type->basic.size = size; - if (mpi_type != MPI_DATATYPE_NULL) { - // create the chunk data type - MPI_Datatype max_contig_type; - ret = MPI_Type_contiguous(MAX_CONTIG_ELEMENTS, - mpi_type, - &max_contig_type); - if (ret != MPI_SUCCESS) { - DART_LOG_ERROR("Failed to create chunk type of DART data type"); - dart_abort(-1); - } - ret = MPI_Type_commit(&max_contig_type); - if (ret != MPI_SUCCESS) { - DART_LOG_ERROR("Failed to commit chunk type of DART data type"); - dart_abort(-1); - } - dart_type->basic.max_contig_type = max_contig_type; - } + // create the type used for large transfers + dart_type->max_type = create_max_datatype(mpi_type); } @@ -78,6 +93,12 @@ dart_type_create_strided( size_t blocklen, dart_datatype_t * newtype) { + + if (newtype == NULL) { + DART_LOG_ERROR("newtype pointer may not be NULL!"); + return DART_ERR_INVAL; + } + *newtype = DART_TYPE_UNDEFINED; dart_datatype_struct_t *basetype = dart__mpi__datatype_struct(basetype_id); @@ -98,9 +119,13 @@ dart_type_create_strided( new_struct->kind = DART_KIND_STRIDED; new_struct->strided.blocklen = blocklen; new_struct->strided.stride = stride; + new_struct->max_type = create_max_datatype(new_mpi_dtype); *newtype = (dart_datatype_t)new_struct; + DART_LOG_TRACE("Created new strided data type %p (mpi_type %p)", + new_struct, new_mpi_dtype); + return DART_OK; } @@ -113,6 +138,11 @@ dart_type_create_indexed( const size_t offset[], dart_datatype_t * newtype) { + if (newtype == NULL) { + DART_LOG_ERROR("newtype pointer may not be NULL!"); + return DART_ERR_INVAL; + } + *newtype = DART_TYPE_UNDEFINED; if (dart__mpi__datatype_struct(basetype)->kind != DART_KIND_BASIC) { @@ -147,9 +177,9 @@ dart_type_create_indexed( } MPI_Datatype mpi_base_type = dart__mpi__datatype_struct(basetype)->mpi_type; - MPI_Datatype mpi_dtype; + MPI_Datatype new_mpi_dtype; int ret = MPI_Type_indexed( - count, mpi_blocklen, mpi_disps, mpi_base_type, &mpi_dtype); + count, mpi_blocklen, mpi_disps, mpi_base_type, &new_mpi_dtype); if (ret != MPI_SUCCESS) { DART_LOG_ERROR("dart_type_create_indexed: failed to create indexed type!"); free(mpi_blocklen); @@ -157,17 +187,21 @@ dart_type_create_indexed( return DART_ERR_INVAL; } - MPI_Type_commit(&mpi_dtype); + MPI_Type_commit(&new_mpi_dtype); dart_datatype_struct_t *new_struct; new_struct = malloc(sizeof(struct dart_datatype_struct)); new_struct->base_type = basetype; - new_struct->mpi_type = mpi_dtype; + new_struct->mpi_type = new_mpi_dtype; new_struct->kind = DART_KIND_INDEXED; new_struct->indexed.blocklens = mpi_blocklen; new_struct->indexed.offsets = mpi_disps; + new_struct->max_type = create_max_datatype(new_mpi_dtype); *newtype = (dart_datatype_t)new_struct; + DART_LOG_TRACE("Created new indexed data type %p (mpi_type %p)", + new_struct, new_mpi_dtype); + return DART_OK; } @@ -191,6 +225,9 @@ dart_type_destroy(dart_datatype_t *dart_type_ptr) free(dart_type->indexed.offsets); dart_type->indexed.offsets = NULL; } + + destroy_max_type(dart_type); + free(dart_type); *dart_type_ptr = DART_TYPE_UNDEFINED; @@ -200,8 +237,7 @@ dart_type_destroy(dart_datatype_t *dart_type_ptr) static void destroy_basic_type(dart_datatype_t dart_type_id) { dart_datatype_struct_t *dart_type = dart__mpi__datatype_struct(dart_type_id); - MPI_Type_free(&dart_type->basic.max_contig_type); - dart_type->basic.max_contig_type = MPI_DATATYPE_NULL; + destroy_max_type(dart_type); } dart_ret_t From 7d77bf6beae3d08c0844117d64eaa6a3797329c2 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Thu, 16 Nov 2017 14:13:34 +0900 Subject: [PATCH 04/31] Add parameter num_blocks to dart_type_create_strided --- dart-if/include/dash/dart/if/dart_types.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/dart-if/include/dash/dart/if/dart_types.h b/dart-if/include/dash/dart/if/dart_types.h index af69d198c..cb9ae96eb 100644 --- a/dart-if/include/dash/dart/if/dart_types.h +++ b/dart-if/include/dash/dart/if/dart_types.h @@ -694,10 +694,12 @@ dart_config_t; * Create a strided data type using blocks of size \c blocklen and a stride * of \c stride. * - * \param basetype The type of elements in the blocks. - * \param stride The stride between blocks. - * \param blocklen The number of elements of type \c basetype in each block. - * \param[out] newtype The newly created data type. + * \param basetype The type of elements in the blocks. + * \param num_blocks The number of blocks of size \c blocklen separated by + * \c stride. + * \param stride The stride between blocks. + * \param blocklen The number of elements of type \c basetype in each block. + * \param[out] newtype The newly created data type. * * \return \ref DART_OK on success, any other of \ref dart_ret_t otherwise. * @@ -706,6 +708,7 @@ dart_config_t; dart_ret_t dart_type_create_strided( dart_datatype_t basetype, + size_t num_blocks, size_t stride, size_t blocklen, dart_datatype_t * newtype); From 390176b2a5aedeb4cbd6d610d3e14af61ce92bd2 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Thu, 16 Nov 2017 14:16:01 +0900 Subject: [PATCH 05/31] Extend interface of dart_put* and dart_get* to include source and destination data types --- .../include/dash/dart/if/dart_communication.h | 32 +- .../dash/dart/mpi/dart_communication_priv.h | 20 +- dart-impl/mpi/src/dart_communication.c | 298 +++++++++++------- dart-impl/mpi/src/dart_mpi_types.c | 51 ++- dash/test/dart/DARTMemAllocTest.cc | 1 + dash/test/dart/DARTOnesidedTest.cc | 6 +- dash/test/dart/ThreadsafetyTest.cc | 2 +- dash/test/memory/GlobHeapMemTest.cc | 2 +- 8 files changed, 277 insertions(+), 135 deletions(-) diff --git a/dart-if/include/dash/dart/if/dart_communication.h b/dart-if/include/dash/dart/if/dart_communication.h index 072d506c9..d614c66d9 100644 --- a/dart-if/include/dash/dart/if/dart_communication.h +++ b/dart-if/include/dash/dart/if/dart_communication.h @@ -343,7 +343,8 @@ dart_ret_t dart_get( void * dest, dart_gptr_t gptr, size_t nelem, - dart_datatype_t dtype) DART_NOTHROW; + dart_datatype_t src_type, + dart_datatype_t dst_type) DART_NOTHROW; /** * 'REGULAR' variant of dart_put. @@ -366,7 +367,8 @@ dart_ret_t dart_put( dart_gptr_t gptr, const void * src, size_t nelem, - dart_datatype_t dtype) DART_NOTHROW; + dart_datatype_t src_type, + dart_datatype_t dst_type) DART_NOTHROW; /** @@ -476,7 +478,8 @@ dart_ret_t dart_get_handle( void * dest, dart_gptr_t gptr, size_t nelem, - dart_datatype_t dtype, + dart_datatype_t src_type, + dart_datatype_t dst_type, dart_handle_t * handle) DART_NOTHROW; /** @@ -500,7 +503,8 @@ dart_ret_t dart_put_handle( dart_gptr_t gptr, const void * src, size_t nelem, - dart_datatype_t dtype, + dart_datatype_t src_type, + dart_datatype_t dst_type, dart_handle_t * handle) DART_NOTHROW; /** @@ -628,7 +632,8 @@ dart_ret_t dart_get_blocking( void * dest, dart_gptr_t gptr, size_t nelem, - dart_datatype_t dtype) DART_NOTHROW; + dart_datatype_t src_type, + dart_datatype_t dst_type) DART_NOTHROW; /** * 'BLOCKING' variant of dart_put. @@ -648,7 +653,8 @@ dart_ret_t dart_put_blocking( dart_gptr_t gptr, const void * src, size_t nelem, - dart_datatype_t dtype) DART_NOTHROW; + dart_datatype_t src_type, + dart_datatype_t dst_type) DART_NOTHROW; /** \} */ @@ -679,8 +685,8 @@ dart_ret_t dart_send( const void * sendbuf, size_t nelem, dart_datatype_t dtype, - int tag, - dart_global_unit_t unit) DART_NOTHROW; + int tag, + dart_global_unit_t unit) DART_NOTHROW; /** * DART Equivalent to MPI recv. @@ -700,24 +706,24 @@ dart_ret_t dart_recv( void * recvbuf, size_t nelem, dart_datatype_t dtype, - int tag, - dart_global_unit_t unit) DART_NOTHROW; + int tag, + dart_global_unit_t unit) DART_NOTHROW; /** * DART Equivalent to MPI sendrecv. * - * \param sendbuf Buffer containing the data to be sent by the + * \param sendbuf Buffer containing the data to be sent by the * source unit. * \param send_nelem Number of values sentby the source unit. * \param send_dtype The data type of values in \c sendbuf. * \param dest Unitthe message is sent to. - * \param send_tag Message tag for the distinction between different + * \param send_tag Message tag for the distinction between different * messages of the source unit. * \param recvbuf Buffer for the incoming data. * \param recv_nelem Number of values received by the destination unit. * \param recv_dtype The data type of values in \c recvbuf. * \param src Unit sending the message. - * \param recv_tag Message tag for the distinction between different + * \param recv_tag Message tag for the distinction between different * messages of the destination unit. * * \return \c DART_OK on success, any other of \ref dart_ret_t otherwise. diff --git a/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h b/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h index 354b417a6..f17e6f383 100644 --- a/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h +++ b/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h @@ -41,8 +41,8 @@ typedef struct dart_datatype_struct { } basic; // used for DART_KIND_STRIDED struct { - size_t stride; - size_t blocklen; + int stride; + int blocklen; } strided; // used for DART_KIND_INDEXED struct { @@ -101,16 +101,32 @@ int dart__mpi__datatype_sizeof(dart_datatype_t dart_type) { return (dts->kind == DART_KIND_BASIC) ? dts->basic.size : -1; } +DART_INLINE +dart_datatype_t dart__mpi__datatype_base(dart_datatype_t dart_type) { + dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); + return (dts->kind == DART_KIND_BASIC) ? dart_type : dts->base_type; +} + DART_INLINE bool dart__mpi__datatype_isbasic(dart_datatype_t dart_type) { return (dart__mpi__datatype_struct(dart_type)->kind == DART_KIND_BASIC); } +DART_INLINE +bool dart__mpi__datatype_samebase( + dart_datatype_t lhs_type, + dart_datatype_t rhs_type) { + return ( + dart__mpi__datatype_base(lhs_type) == dart__mpi__datatype_base(rhs_type)); +} + DART_INLINE MPI_Datatype dart__mpi__datatype_maxtype(dart_datatype_t dart_type) { dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); return dts->max_type; } +char* dart__mpi__datatype_name(dart_datatype_t dart_type) DART_INTERNAL; + #endif /* DART_ADAPT_COMMUNICATION_PRIV_H_INCLUDED */ diff --git a/dart-impl/mpi/src/dart_communication.c b/dart-impl/mpi/src/dart_communication.c index 97c733754..5babcb273 100644 --- a/dart-impl/mpi/src/dart_communication.c +++ b/dart-impl/mpi/src/dart_communication.c @@ -40,6 +40,29 @@ } \ } while (0) +#define CHECK_EQUAL_BASETYPE(_src_type, _dst_type) \ + do { \ + if (dart__unlikely(!dart__mpi__datatype_samebase(_src_type, _dst_type))){ \ + char *src_name = dart__mpi__datatype_name(_src_type); \ + char *dst_name = dart__mpi__datatype_name(dst_type); \ + DART_LOG_ERROR("dart_get ! Cannot convert base-types (%s vs %s)", \ + src_name, dst_name); \ + free(src_name); \ + free(dst_name); \ + return DART_ERR_INVAL; \ + } \ + } while (0) + +#define CHECK_IS_BASICTYPE(_dtype) \ + do { \ + if (dart__unlikely(!dart__mpi__datatype_isbasic(_dtype))) { \ + char *name = dart__mpi__datatype_name(_dtype); \ + DART_LOG_ERROR("dart_get ! Cannot fetch-op non-basic type (%s)", name); \ + free(name); \ + return DART_ERR_INVAL; \ + } \ + } while (0) + /** * Temporary space allocation: * - on the stack for allocations <=64B @@ -124,14 +147,16 @@ dart_ret_t dart_get( void * dest, dart_gptr_t gptr, size_t nelem, - dart_datatype_t dtype) + dart_datatype_t src_type, + dart_datatype_t dst_type) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); uint64_t offset = gptr.addr_or_offs.offset; int16_t seg_id = gptr.segid; dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); dart_team_t teamid = gptr.teamid; + CHECK_EQUAL_BASETYPE(src_type, dst_type); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_get ! failed: Unknown team %i!", teamid); @@ -151,25 +176,29 @@ dart_ret_t dart_get( return DART_ERR_INVAL; } - if (team_data->unitid == team_unit_id.id) { - // use direct memcpy if we are on the same unit - memcpy(dest, seginfo->selfbaseptr + offset, - nelem * dart__mpi__datatype_sizeof(dtype)); - DART_LOG_DEBUG("dart_get: memcpy nelem:%zu " - "source (coll.): offset:%lu -> dest: %p", - nelem, offset, dest); - return DART_OK; - } + // leave complex data type handling to MPI + if (dart__mpi__datatype_isbasic(src_type) && + dart__mpi__datatype_isbasic(dst_type)) { + if (team_data->unitid == team_unit_id.id) { + // use direct memcpy if we are on the same unit + memcpy(dest, seginfo->selfbaseptr + offset, + nelem * dart__mpi__datatype_sizeof(src_type)); + DART_LOG_DEBUG("dart_get: memcpy nelem:%zu " + "source (coll.): offset:%lu -> dest: %p", + nelem, offset, dest); + return DART_OK; + } #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_get: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - return get_shared_mem(team_data, seginfo, dest, offset, - team_unit_id, nelem, dtype); - } + DART_LOG_DEBUG("dart_get: shared windows enabled"); + if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { + return get_shared_mem(team_data, seginfo, dest, offset, + team_unit_id, nelem, src_type); + } #else - DART_LOG_DEBUG("dart_get: shared windows disabled"); + DART_LOG_DEBUG("dart_get: shared windows disabled"); #endif // !defined(DART_MPI_DISABLE_SHARED_WINDOWS) + } /* * MPI uses offset type int, do not copy more than INT_MAX elements: @@ -189,11 +218,11 @@ dart_ret_t dart_get( CHECK_MPI_RET( MPI_Get(dest_ptr, nchunks, - dart__mpi__datatype_maxtype(dtype), + dart__mpi__datatype_maxtype(src_type), team_unit_id.id, offset, nchunks, - dart__mpi__datatype_maxtype(dtype), + dart__mpi__datatype_maxtype(dst_type), win), "MPI_Get"); offset += nchunks * MAX_CONTIG_ELEMENTS; @@ -205,11 +234,11 @@ dart_ret_t dart_get( CHECK_MPI_RET( MPI_Get(dest_ptr, remainder, - mpi_dtype, + dart__mpi__datatype(src_type), team_unit_id.id, offset, remainder, - mpi_dtype, + dart__mpi__datatype(dst_type), win), "MPI_Get"); } @@ -222,14 +251,16 @@ dart_ret_t dart_put( dart_gptr_t gptr, const void * src, size_t nelem, - dart_datatype_t dtype) + dart_datatype_t src_type, + dart_datatype_t dst_type) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); uint64_t offset = gptr.addr_or_offs.offset; int16_t seg_id = gptr.segid; dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); dart_team_t teamid = gptr.teamid; + CHECK_EQUAL_BASETYPE(src_type, dst_type); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_put ! failed: Unknown team %i!", teamid); @@ -246,25 +277,28 @@ dart_ret_t dart_put( return DART_ERR_INVAL; } - /* copy data directly if we are on the same unit */ - if (team_unit_id.id == team_data->unitid) { - memcpy(seginfo->selfbaseptr + offset, src, - nelem * dart__mpi__datatype_sizeof(dtype)); - DART_LOG_DEBUG("dart_put: memcpy nelem:%zu (from global allocation)" - "offset: %"PRIu64"", nelem, offset); - return DART_OK; - } + // leave complex data type handling to MPI + if (dart__mpi__datatype_isbasic(src_type) && + dart__mpi__datatype_isbasic(dst_type)) { + /* copy data directly if we are on the same unit */ + if (team_unit_id.id == team_data->unitid) { + memcpy(seginfo->selfbaseptr + offset, src, + nelem * dart__mpi__datatype_sizeof(src_type)); + DART_LOG_DEBUG("dart_put: memcpy nelem:%zu (from global allocation)" + "offset: %"PRIu64"", nelem, offset); + return DART_OK; + } #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_put: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - return put_shared_mem(team_data, seginfo, src, offset, - team_unit_id, nelem, dtype); - } + DART_LOG_DEBUG("dart_put: shared windows enabled"); + if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { + return put_shared_mem(team_data, seginfo, src, offset, + team_unit_id, nelem, src_type); + } #else - DART_LOG_DEBUG("dart_put: shared windows disabled"); + DART_LOG_DEBUG("dart_put: shared windows disabled"); #endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ - + } // source on another node or shared memory windows disabled MPI_Win win = seginfo->win; offset += dart_segment_disp(seginfo, team_unit_id); @@ -280,11 +314,11 @@ dart_ret_t dart_put( CHECK_MPI_RET( MPI_Put(src_ptr, nchunks, - dart__mpi__datatype_maxtype(dtype), + dart__mpi__datatype_maxtype(src_type), team_unit_id.id, offset, nchunks, - dart__mpi__datatype_maxtype(dtype), + dart__mpi__datatype_maxtype(dst_type), win), "MPI_Put"); offset += nchunks * MAX_CONTIG_ELEMENTS; @@ -297,11 +331,11 @@ dart_ret_t dart_put( CHECK_MPI_RET( MPI_Put(src_ptr, remainder, - mpi_dtype, + dart__mpi__datatype(src_type), team_unit_id.id, offset, remainder, - mpi_dtype, + dart__mpi__datatype(dst_type), win), "MPI_Put"); } @@ -409,6 +443,8 @@ dart_ret_t dart_fetch_and_op( mpi_dtype = dart__mpi__datatype(dtype); mpi_op = dart__mpi__op(op); + CHECK_IS_BASICTYPE(dtype); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_fetch_and_op ! failed: Unknown team %i!", teamid); @@ -511,10 +547,10 @@ dart_ret_t dart_get_handle( void * dest, dart_gptr_t gptr, size_t nelem, - dart_datatype_t dtype, + dart_datatype_t src_type, + dart_datatype_t dst_type, dart_handle_t * handleptr) { - MPI_Datatype mpi_type = dart__mpi__datatype(dtype); dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); uint64_t offset = gptr.addr_or_offs.offset; int16_t seg_id = gptr.segid; @@ -522,6 +558,8 @@ dart_ret_t dart_get_handle( *handleptr = DART_HANDLE_NULL; + CHECK_EQUAL_BASETYPE(src_type, dst_type); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_get_handle ! failed: Unknown team %i!", teamid); @@ -542,18 +580,22 @@ dart_ret_t dart_get_handle( team_unit_id.id, offset, seg_id, gptr.teamid, nelem); DART_LOG_TRACE("dart_get_handle: allocated handle:%p", (void *)(*handleptr)); + // leave complex data type handling to MPI + if (dart__mpi__datatype_isbasic(src_type) && + dart__mpi__datatype_isbasic(dst_type)) { #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_get_handle: shared windows enabled"); + DART_LOG_DEBUG("dart_get_handle: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - dart_ret_t ret = get_shared_mem(team_data, seginfo, dest, offset, - team_unit_id, nelem, dtype); - // return NULL request - return ret; - } + if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { + dart_ret_t ret = get_shared_mem(team_data, seginfo, dest, offset, + team_unit_id, nelem, src_type); + // return NULL request + return ret; + } #else - DART_LOG_DEBUG("dart_get_handle: shared windows disabled"); + DART_LOG_DEBUG("dart_get_handle: shared windows disabled"); #endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ + } /* * MPI shared windows disabled or target and calling unit are on different @@ -578,11 +620,11 @@ dart_ret_t dart_get_handle( dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); if (MPI_Rget(dest_ptr, nchunks, - dart__mpi__datatype_maxtype(dtype), + dart__mpi__datatype_maxtype(src_type), team_unit_id.id, offset, nchunks, - dart__mpi__datatype_maxtype(dtype), + dart__mpi__datatype_maxtype(dst_type), win, &handle->reqs[handle->num_reqs++]) != MPI_SUCCESS) { free(handle); @@ -594,16 +636,15 @@ dart_ret_t dart_get_handle( } if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); DART_LOG_TRACE( "dart_get_handle: MPI_Rget (dest %p, size %zu)", dest_ptr, remainder); if (MPI_Rget(dest_ptr, remainder, - mpi_dtype, + dart__mpi__datatype(src_type), team_unit_id.id, offset, remainder, - mpi_dtype, + dart__mpi__datatype(dst_type), win, &handle->reqs[handle->num_reqs++]) != MPI_SUCCESS) { free(handle); @@ -624,7 +665,8 @@ dart_ret_t dart_put_handle( dart_gptr_t gptr, const void * src, size_t nelem, - dart_datatype_t dtype, + dart_datatype_t src_type, + dart_datatype_t dst_type, dart_handle_t * handleptr) { dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); @@ -634,6 +676,8 @@ dart_ret_t dart_put_handle( *handleptr = DART_HANDLE_NULL; + CHECK_EQUAL_BASETYPE(src_type, dst_type); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_put ! failed: Unknown team %i!", teamid); @@ -667,11 +711,11 @@ dart_ret_t dart_put_handle( src_ptr, nchunks * MAX_CONTIG_ELEMENTS); if (MPI_Rput(src_ptr, nchunks, - dart__mpi__datatype_maxtype(dtype), + dart__mpi__datatype_maxtype(src_type), team_unit_id.id, offset, nchunks, - dart__mpi__datatype_maxtype(dtype), + dart__mpi__datatype_maxtype(dst_type), win, &handle->reqs[handle->num_reqs++]) != MPI_SUCCESS) { free(handle); @@ -683,16 +727,15 @@ dart_ret_t dart_put_handle( } if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); DART_LOG_TRACE( "dart_put_handle: MPI_Rput (src %p, size %zu)", src_ptr, remainder); if (MPI_Rput(src_ptr, remainder, - mpi_dtype, + dart__mpi__datatype(src_type), team_unit_id.id, offset, remainder, - mpi_dtype, + dart__mpi__datatype(dst_type), win, &handle->reqs[handle->num_reqs++]) != MPI_SUCCESS) { free(handle); @@ -712,16 +755,19 @@ dart_ret_t dart_put_handle( * \todo Check if MPI_Get_accumulate (MPI_NO_OP) yields better performance */ dart_ret_t dart_put_blocking( - dart_gptr_t gptr, - const void * src, - size_t nelem, - dart_datatype_t dtype) + dart_gptr_t gptr, + const void * src, + size_t nelem, + dart_datatype_t src_type, + dart_datatype_t dst_type) { dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); uint64_t offset = gptr.addr_or_offs.offset; int16_t seg_id = gptr.segid; dart_team_t teamid = gptr.teamid; + CHECK_EQUAL_BASETYPE(src_type, dst_type); + dart_team_data_t *team_data = dart_adapt_teamlist_get(gptr.teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_put_blocking ! failed: Unknown team %i!", gptr.teamid); @@ -742,24 +788,28 @@ dart_ret_t dart_put_blocking( team_unit_id.id, offset, seg_id, gptr.teamid, nelem); - /* copy data directly if we are on the same unit */ - if (team_unit_id.id == team_data->unitid) { - memcpy(seginfo->selfbaseptr + offset, src, - nelem * dart__mpi__datatype_sizeof(dtype)); - DART_LOG_DEBUG("dart_put_blocking: memcpy nelem:%zu (from global allocation)" - "offset: %"PRIu64"", nelem, offset); - return DART_OK; - } + // leave complex data type handling to MPI + if (dart__mpi__datatype_isbasic(src_type) && + dart__mpi__datatype_isbasic(dst_type)) { + /* copy data directly if we are on the same unit */ + if (team_unit_id.id == team_data->unitid) { + memcpy(seginfo->selfbaseptr + offset, src, + nelem * dart__mpi__datatype_sizeof(src_type)); + DART_LOG_DEBUG("dart_put_blocking: memcpy nelem:%zu (from global allocation)" + "offset: %"PRIu64"", nelem, offset); + return DART_OK; + } #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_put_blocking: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - return put_shared_mem(team_data, seginfo, src, offset, - team_unit_id, nelem, dtype); - } + DART_LOG_DEBUG("dart_put_blocking: shared windows enabled"); + if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { + return put_shared_mem(team_data, seginfo, src, offset, + team_unit_id, nelem, src_type); + } #else - DART_LOG_DEBUG("dart_put_blocking: shared windows disabled"); + DART_LOG_DEBUG("dart_put_blocking: shared windows disabled"); #endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ + } MPI_Win win = seginfo->win; offset += dart_segment_disp(seginfo, team_unit_id); @@ -779,11 +829,11 @@ dart_ret_t dart_put_blocking( CHECK_MPI_RET( MPI_Put(src_ptr, nchunks, - dart__mpi__datatype_maxtype(dtype), + dart__mpi__datatype_maxtype(src_type), team_unit_id.id, offset, nchunks, - dart__mpi__datatype_maxtype(dtype), + dart__mpi__datatype_maxtype(dst_type), win), "MPI_Put"); src_ptr += nchunks * MAX_CONTIG_ELEMENTS; @@ -791,17 +841,16 @@ dart_ret_t dart_put_blocking( } if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); DART_LOG_TRACE( "dart_put_blocking: MPI_Put (src %p, size %zu)", src_ptr, remainder); CHECK_MPI_RET( MPI_Put(src_ptr, remainder, - mpi_dtype, + dart__mpi__datatype(src_type), team_unit_id.id, offset, remainder, - mpi_dtype, + dart__mpi__datatype(dst_type), win), "MPI_Put"); } @@ -817,16 +866,19 @@ dart_ret_t dart_put_blocking( * \todo Check if MPI_Accumulate (REPLACE) yields better performance */ dart_ret_t dart_get_blocking( - void * dest, - dart_gptr_t gptr, - size_t nelem, - dart_datatype_t dtype) + void * dest, + dart_gptr_t gptr, + size_t nelem, + dart_datatype_t src_type, + dart_datatype_t dst_type) { dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); uint64_t offset = gptr.addr_or_offs.offset; int16_t seg_id = gptr.segid; dart_team_t teamid = gptr.teamid; + CHECK_EQUAL_BASETYPE(src_type, dst_type); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_get_blocking ! failed: Unknown team %i!", teamid); @@ -848,25 +900,29 @@ dart_ret_t dart_get_blocking( return DART_ERR_INVAL; } - if (team_data->unitid == team_unit_id.id) { - // use direct memcpy if we are on the same unit - memcpy(dest, seginfo->selfbaseptr + offset, - nelem * dart__mpi__datatype_sizeof(dtype)); - DART_LOG_DEBUG("dart_get_blocking: memcpy nelem:%zu " - "source (coll.): offset:%lu -> dest: %p", - nelem, offset, dest); - return DART_OK; - } + // leave complex data type handling to MPI + if (dart__mpi__datatype_isbasic(src_type) && + dart__mpi__datatype_isbasic(dst_type)) { + if (team_data->unitid == team_unit_id.id) { + // use direct memcpy if we are on the same unit + memcpy(dest, seginfo->selfbaseptr + offset, + nelem * dart__mpi__datatype_sizeof(src_type)); + DART_LOG_DEBUG("dart_get_blocking: memcpy nelem:%zu " + "source (coll.): offset:%lu -> dest: %p", + nelem, offset, dest); + return DART_OK; + } #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_get_blocking: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - return get_shared_mem(team_data, seginfo, dest, offset, - team_unit_id, nelem, dtype); - } + DART_LOG_DEBUG("dart_get_blocking: shared windows enabled"); + if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { + return get_shared_mem(team_data, seginfo, dest, offset, + team_unit_id, nelem, src_type); + } #else - DART_LOG_DEBUG("dart_get_blocking: shared windows disabled"); + DART_LOG_DEBUG("dart_get_blocking: shared windows disabled"); #endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ + } /* * MPI shared windows disabled or target and calling unit are on different @@ -892,11 +948,11 @@ dart_ret_t dart_get_blocking( CHECK_MPI_RET( MPI_Rget(dest_ptr, nchunks, - dart__mpi__datatype_maxtype(dtype), + dart__mpi__datatype_maxtype(src_type), team_unit_id.id, offset, nchunks, - dart__mpi__datatype_maxtype(dtype), + dart__mpi__datatype_maxtype(dst_type), win, &reqs[nreqs++]), "MPI_Rget"); @@ -905,18 +961,17 @@ dart_ret_t dart_get_blocking( } if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); DART_LOG_TRACE( "dart_get_blocking: MPI_Rget (dest %p, size %zu)", dest_ptr, remainder); CHECK_MPI_RET( MPI_Rget(dest_ptr, remainder, - mpi_dtype, + dart__mpi__datatype(src_type), team_unit_id.id, offset, remainder, - mpi_dtype, + dart__mpi__datatype(dst_type), win, &reqs[nreqs++]), "MPI_Rget"); @@ -1515,6 +1570,8 @@ dart_ret_t dart_scatter( dart_team_unit_t root, dart_team_t teamid) { + CHECK_IS_BASICTYPE(dtype); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_scatter ! failed: unknown team %d", teamid); @@ -1577,6 +1634,8 @@ dart_ret_t dart_gather( DART_LOG_TRACE("dart_gather() team:%d nelem:%"PRIu64"", teamid, nelem); + CHECK_IS_BASICTYPE(dtype); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_gather ! failed: unknown teamid %d", teamid); @@ -1638,6 +1697,8 @@ dart_ret_t dart_allgather( DART_LOG_TRACE("dart_allgather() team:%d nelem:%"PRIu64"", teamid, nelem); + CHECK_IS_BASICTYPE(dtype); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_allgather ! unknown teamid %d", teamid); @@ -1703,6 +1764,8 @@ dart_ret_t dart_allgatherv( DART_LOG_TRACE("dart_allgatherv() team:%d nsendelem:%"PRIu64"", teamid, nsendelem); + CHECK_IS_BASICTYPE(dtype); + /* * MPI uses offset type int, do not copy more than INT_MAX elements: */ @@ -1773,6 +1836,9 @@ dart_ret_t dart_allreduce( dart_operation_t op, dart_team_t team) { + + CHECK_IS_BASICTYPE(dtype); + MPI_Op mpi_op = dart__mpi__op(op); MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); @@ -1815,6 +1881,8 @@ dart_ret_t dart_reduce( MPI_Op mpi_op = dart__mpi__op(op); MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + CHECK_IS_BASICTYPE(dtype); + /* * MPI uses offset type int, do not copy more than INT_MAX elements: */ @@ -1847,10 +1915,10 @@ dart_ret_t dart_reduce( dart_ret_t dart_send( const void * sendbuf, - size_t nelem, - dart_datatype_t dtype, - int tag, - dart_global_unit_t unit) + size_t nelem, + dart_datatype_t dtype, + int tag, + dart_global_unit_t unit) { MPI_Comm comm; MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); @@ -1982,5 +2050,3 @@ dart_ret_t dart_sendrecv( "MPI_Sendrecv"); return DART_OK; } - - diff --git a/dart-impl/mpi/src/dart_mpi_types.c b/dart-impl/mpi/src/dart_mpi_types.c index abc65b84a..a162a1084 100644 --- a/dart-impl/mpi/src/dart_mpi_types.c +++ b/dart-impl/mpi/src/dart_mpi_types.c @@ -14,6 +14,23 @@ #include #include #include +#include + +#define DART_TYPE_NAMELEN 256 + +static const char* __dart_base_type_names[DART_TYPE_LAST+1] = { + "UNDEFINED", + "BYTE", + "SHORT", + "INT", + "UNSIGNED INT", + "LONG", + "UNSIGNED LONG", + "LONG LONG", + "FLOAT", + "DOUBLE", + "INVALID" +}; dart_datatype_struct_t __dart_base_types[DART_TYPE_LAST]; @@ -86,9 +103,36 @@ dart__mpi__datatype_init() return DART_OK; } +char* dart__mpi__datatype_name(dart_datatype_t dart_type) +{ + char *buf = NULL; + if (dart_type <= DART_TYPE_LAST) { + buf = strdup(__dart_base_type_names[dart_type]); + } else { + dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); + if (dts->kind == DART_KIND_INDEXED) { + buf = malloc(DART_TYPE_NAMELEN); + char *base_name = dart__mpi__datatype_name(dts->base_type); + snprintf(buf, DART_TYPE_NAMELEN, "INDEXED(%i:%s)", + dts->indexed.count, base_name); + free(base_name); + } else if (dts->kind == DART_KIND_STRIDED){ + buf = malloc(DART_TYPE_NAMELEN); + char *base_name = dart__mpi__datatype_name(dts->base_type); + snprintf(buf, DART_TYPE_NAMELEN, "STRIDED(%i:%i:%s)", + dts->strided.blocklen, dts->strided.stride, base_name); + free(base_name); + } else { + DART_LOG_ERROR("INVALID data type detected!"); + } + } + return buf; +} + dart_ret_t dart_type_create_strided( dart_datatype_t basetype_id, + size_t num_blocks, size_t stride, size_t blocklen, dart_datatype_t * newtype) @@ -108,9 +152,14 @@ dart_type_create_strided( return DART_ERR_INVAL; } + if (num_blocks > INT_MAX || stride > INT_MAX || blocklen > INT_MAX) { + DART_LOG_ERROR("dart_type_create_strided: arguments out of range (>INT_MAX)"); + return DART_ERR_INVAL; + } + MPI_Datatype mpi_dtype = basetype->mpi_type; MPI_Datatype new_mpi_dtype; - MPI_Type_vector(1, blocklen, stride, mpi_dtype, &new_mpi_dtype); + MPI_Type_vector(num_blocks, blocklen, stride, mpi_dtype, &new_mpi_dtype); MPI_Type_commit(&new_mpi_dtype); dart_datatype_struct_t *new_struct; new_struct = malloc(sizeof(struct dart_datatype_struct)); diff --git a/dash/test/dart/DARTMemAllocTest.cc b/dash/test/dart/DARTMemAllocTest.cc index 85d4b2e73..66fcc3992 100644 --- a/dash/test/dart/DARTMemAllocTest.cc +++ b/dash/test/dart/DARTMemAllocTest.cc @@ -83,6 +83,7 @@ TEST_F(DARTMemAllocTest, LocalAlloc) &neighbor_val, arr[neighbor_id], ds.nelem, + ds.dtype, ds.dtype)); ASSERT_EQ_U( diff --git a/dash/test/dart/DARTOnesidedTest.cc b/dash/test/dart/DARTOnesidedTest.cc index 795e18629..455bc0b28 100644 --- a/dash/test/dart/DARTOnesidedTest.cc +++ b/dash/test/dart/DARTOnesidedTest.cc @@ -29,6 +29,7 @@ TEST_F(DARTOnesidedTest, GetBlockingSingleBlock) local_array, // lptr dest (array.begin() + g_src_index).dart_gptr(), // gptr start ds.nelem, + ds.dtype, ds.dtype ); for (size_t l = 0; l < block_size; ++l) { @@ -70,6 +71,7 @@ TEST_F(DARTOnesidedTest, GetBlockingSingleBlockTeam) local_array, // lptr dest (array.begin() + g_src_index).dart_gptr(), // gptr start ds.nelem, + ds.dtype, ds.dtype ); for (size_t l = 0; l < block_size; ++l) { @@ -102,7 +104,8 @@ TEST_F(DARTOnesidedTest, GetBlockingTwoBlocks) local_array, // lptr dest array.begin().dart_gptr(), // gptr start ds.nelem, // number of elements - ds.dtype // data type + ds.dtype, // src data type + ds.dtype // dst data type ); // Fails for elements in second block, i.e. for l < num_elem_copy: for (size_t l = 0; l < block_size; ++l) { @@ -148,6 +151,7 @@ TEST_F(DARTOnesidedTest, GetHandleAllRemote) (array.begin() + (u * block_size)).dart_gptr(), ds.nelem, ds.dtype, + ds.dtype, &handle) ); LOG_MESSAGE("dart_get_handle returned handle %p", diff --git a/dash/test/dart/ThreadsafetyTest.cc b/dash/test/dart/ThreadsafetyTest.cc index 2fae06cd5..8f5adcbb9 100644 --- a/dash/test/dart/ThreadsafetyTest.cc +++ b/dash/test/dart/ThreadsafetyTest.cc @@ -203,7 +203,7 @@ TEST_F(ThreadsafetyTest, ConcurrentAttach) { gptr_r.unitid = (team->myid() + 1) % team->size(); dash::dart_storage ds(elem_per_thread); ASSERT_EQ_U( - dart_get_blocking(check, gptr_r, ds.nelem, ds.dtype), + dart_get_blocking(check, gptr_r, ds.nelem, ds.dtype, ds.dtype), DART_OK); team->barrier(); diff --git a/dash/test/memory/GlobHeapMemTest.cc b/dash/test/memory/GlobHeapMemTest.cc index df77d4561..81ba0a8af 100644 --- a/dash/test/memory/GlobHeapMemTest.cc +++ b/dash/test/memory/GlobHeapMemTest.cc @@ -224,7 +224,7 @@ TEST_F(GlobHeapMemTest, UnbalancedRealloc) // request value via DART global pointer: value_t dart_gptr_value; dash::dart_storage ds(1); - dart_get_blocking(&dart_gptr_value, gptr, ds.nelem, ds.dtype); + dart_get_blocking(&dart_gptr_value, gptr, ds.nelem, ds.dtype, ds.dtype); DASH_LOG_TRACE_VAR("GlobHeapMemTest.UnbalancedRealloc", dart_gptr_value); From cca5ed922eee1786c50b69a4bc6b4d07572bedde Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Thu, 16 Nov 2017 14:17:22 +0900 Subject: [PATCH 06/31] Add dash::internal::put/get to wrap dart_put/get --- dash/include/dash/Onesided.h | 104 ++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 32 deletions(-) diff --git a/dash/include/dash/Onesided.h b/dash/include/dash/Onesided.h index f0e5e5003..ff287488f 100644 --- a/dash/include/dash/Onesided.h +++ b/dash/include/dash/Onesided.h @@ -8,8 +8,68 @@ namespace dash { +namespace internal { + + template + inline + void + put(const T *src, const dart_gptr_t& gptr, size_t nelem) { + dash::dart_storage ds(nelem); + DASH_ASSERT_RETURNS( + dart_put(gptr, + src, + ds.nelem, + ds.dtype, + ds.dtype), + DART_OK); + } + + template + inline + void + get(const dart_gptr_t& gptr, T *dst, size_t nelem) { + dash::dart_storage ds(nelem); + DASH_ASSERT_RETURNS( + dart_get(gptr, + dst, + ds.nelem, + ds.dtype, + ds.dtype), + DART_OK); + } + + template + inline + void + put_blocking(const T *src, const dart_gptr_t& gptr, size_t nelem) { + dash::dart_storage ds(nelem); + DASH_ASSERT_RETURNS( + dart_put_blocking(gptr, + src, + ds.nelem, + ds.dtype, + ds.dtype), + DART_OK); + } + + template + inline + void + get_blocking(const dart_gptr_t& gptr, T *dst, size_t nelem) { + dash::dart_storage ds(nelem); + DASH_ASSERT_RETURNS( + dart_get_blocking(gptr, + dst, + ds.nelem, + ds.dtype, + ds.dtype), + DART_OK); + } + +} // namespace internal + /** - * Block until completion of local and global operations on a global + * Block until local and global completion of operations on a global * address. */ template @@ -17,19 +77,19 @@ void fence( const GlobPtrType & gptr) { DASH_ASSERT_RETURNS( - dart_fence(gptr.dart_gptr()), + dart_flush(gptr.dart_gptr()), DART_OK); } /** - * Block until completion of local operations on a global address. + * Block until local completion of operations on a global address. */ template void fence_local( const GlobPtrType & gptr) { DASH_ASSERT_RETURNS( - dart_fence_local(gptr.dart_gptr()), + dart_flush_local(gptr.dart_gptr()), DART_OK); } @@ -40,19 +100,14 @@ void fence_local( * \nonblocking */ template +constexpr void put_value_async( /// [IN] Value to set const T & newval, /// [IN] Global pointer referencing target address of value const GlobPtrType & gptr) { - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_put(gptr.dart_gptr(), - (void *)(&newval), - ds.nelem, - ds.dtype), - DART_OK); + dash::internal::put(&newval, gptr.dart_gptr(), 1); } /** @@ -62,6 +117,7 @@ void put_value_async( * \nonblocking */ template +constexpr void get_value_async( /// [OUT] Local pointer that will contain the value of the /// global address @@ -69,13 +125,7 @@ void get_value_async( /// [IN] Global pointer to read const GlobPtrType & gptr) { - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get(ptr, - gptr.dart_gptr(), - ds.nelem, - ds.dtype), - DART_OK); + dash::internal::get(gptr.dart_gptr(), ptr, 1); } /** @@ -84,19 +134,14 @@ void get_value_async( * \blocking */ template +constexpr void put_value( /// [IN] Value to set const T & newval, /// [IN] Global pointer referencing target address of value const GlobPtrType & gptr) { - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_put_blocking(gptr.dart_gptr(), - (void *)(&newval), - ds.nelem, - ds.dtype), - DART_OK); + dash::internal::put_blocking(&newval, gptr.dart_gptr(), 1); } /** @@ -105,6 +150,7 @@ void put_value( * \blocking */ template +constexpr void get_value( /// [OUT] Local pointer that will contain the value of the /// global address @@ -112,13 +158,7 @@ void get_value( /// [IN] Global pointer to read const GlobPtrType & gptr) { - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get_blocking(ptr, - gptr.dart_gptr(), - ds.nelem, - ds.dtype), - DART_OK); + dash::internal::get_blocking(gptr.dart_gptr(), ptr, 1); } } // namespace dash From 245858ac74b29e77b8b6d86bee0fdad58c1545ed Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Thu, 16 Nov 2017 15:17:27 +0900 Subject: [PATCH 07/31] Adapt DASH to DART API changes --- dash/include/dash/GlobAsyncRef.h | 25 +---- dash/include/dash/GlobRef.h | 51 ++------- dash/include/dash/GlobSharedRef.h | 15 +-- dash/include/dash/Onesided.h | 92 +++++++++++++-- dash/include/dash/algorithm/Copy.h | 118 ++++++-------------- dash/include/dash/experimental/HaloMatrix.h | 6 +- dash/include/dash/memory/GlobHeapMem.h | 17 ++- dash/test/dart/DARTOnesidedTest.cc | 8 +- 8 files changed, 152 insertions(+), 180 deletions(-) diff --git a/dash/include/dash/GlobAsyncRef.h b/dash/include/dash/GlobAsyncRef.h index e56325750..3e56ae768 100644 --- a/dash/include/dash/GlobAsyncRef.h +++ b/dash/include/dash/GlobAsyncRef.h @@ -187,11 +187,7 @@ class GlobAsyncRef nonconst_value_type get() const { nonconst_value_type value; DASH_LOG_TRACE_VAR("GlobAsyncRef.T()", _gptr); - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get_blocking(static_cast(&value), _gptr, ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::get_blocking(_gptr, &value, 1); return value; } @@ -202,11 +198,7 @@ class GlobAsyncRef * at which point the referenced value can be used. */ void get(nonconst_value_type *tptr) const { - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get(static_cast(tptr), _gptr, ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::get(_gptr, tptr, 1); } /** @@ -228,11 +220,7 @@ class GlobAsyncRef void set(const_value_type* tptr) { DASH_LOG_TRACE_VAR("GlobAsyncRef.set()", *tptr); DASH_LOG_TRACE_VAR("GlobAsyncRef.set()", _gptr); - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_put(_gptr, static_cast(tptr), ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::put(_gptr, tptr, 1); } /** @@ -244,7 +232,6 @@ class GlobAsyncRef void set(const_value_type& new_value) { DASH_LOG_TRACE_VAR("GlobAsyncRef.set()", new_value); DASH_LOG_TRACE_VAR("GlobAsyncRef.set()", _gptr); - dash::dart_storage ds(1); _value = new_value; // check that we do not overwrite the handle if it has been used before if (this->_handle != DART_HANDLE_NULL) { @@ -253,11 +240,7 @@ class GlobAsyncRef DART_OK ); } - DASH_ASSERT_RETURNS( - dart_put_handle(_gptr, static_cast(&_value), - ds.nelem, ds.dtype, &_handle), - DART_OK - ); + dash::internal::put_handle(_gptr, &_value, 1, &_handle); } /** diff --git a/dash/include/dash/GlobRef.h b/dash/include/dash/GlobRef.h index 8a5f0283b..b87ba7e4a 100644 --- a/dash/include/dash/GlobRef.h +++ b/dash/include/dash/GlobRef.h @@ -114,7 +114,7 @@ class GlobRef explicit constexpr GlobRef(dart_gptr_t dart_gptr) : _gptr(dart_gptr) { } - + /** * Constructor to convert \c GlobAsyncRef to GlobRef. Set to explicit to * avoid unintendet conversion @@ -131,7 +131,7 @@ class GlobRef * of \c operator=(const self_t &). */ GlobRef(const self_t & other) = delete; - + /** * Unlike native reference types, global reference types are moveable. */ @@ -168,11 +168,7 @@ class GlobRef DASH_LOG_TRACE("GlobRef.T()", "conversion operator"); DASH_LOG_TRACE_VAR("GlobRef.T()", _gptr); nonconst_value_type t; - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get_blocking(static_cast(&t), _gptr, ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::get_blocking(_gptr, &t, 1); DASH_LOG_TRACE_VAR("GlobRef.T >", _gptr); return t; } @@ -204,12 +200,7 @@ class GlobRef DASH_LOG_TRACE_VAR("GlobRef.set", _gptr); // TODO: Clarify if dart-call can be avoided if // _gptr->is_local() - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_put_blocking( - _gptr, static_cast(&val), ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::put_blocking(_gptr, &val, 1); DASH_LOG_TRACE_VAR("GlobRef.set >", _gptr); } @@ -217,54 +208,32 @@ class GlobRef DASH_LOG_TRACE("T GlobRef.get()", "explicit get"); DASH_LOG_TRACE_VAR("GlobRef.T()", _gptr); nonconst_value_type t; - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get_blocking(static_cast(&t), _gptr, ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::get_blocking(_gptr, &t, 1); return t; } void get(nonconst_value_type *tptr) const { DASH_LOG_TRACE("GlobRef.get(T*)", "explicit get into provided ptr"); DASH_LOG_TRACE_VAR("GlobRef.T()", _gptr); - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get_blocking(static_cast(tptr), _gptr, ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::get_blocking(_gptr, tptr, 1); } void get(nonconst_value_type& tref) const { DASH_LOG_TRACE("GlobRef.get(T&)", "explicit get into provided ref"); DASH_LOG_TRACE_VAR("GlobRef.T()", _gptr); - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get_blocking(static_cast(&tref), _gptr, ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::get_blocking(_gptr, &tref, 1); } void put(const_value_type& tref) { DASH_LOG_TRACE("GlobRef.put(T&)", "explicit put of provided ref"); DASH_LOG_TRACE_VAR("GlobRef.T()", _gptr); - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_put_blocking( - _gptr, static_cast(&tref), ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::put_blocking(_gptr, &tref, 1); } void put(const_value_type* tptr) { DASH_LOG_TRACE("GlobRef.put(T*)", "explicit put of provided ptr"); DASH_LOG_TRACE_VAR("GlobRef.T()", _gptr); - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_put_blocking( - _gptr, static_cast(tptr), ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::put_blocking(_gptr, tptr, 1); } self_t & operator+=(const nonconst_value_type& ref) { @@ -396,7 +365,7 @@ class GlobRef } /** - * specialization which swappes the values of two global references + * specialization which swappes the values of two global references */ inline void swap(dash::GlobRef & b){ T tmp = static_cast(*this); diff --git a/dash/include/dash/GlobSharedRef.h b/dash/include/dash/GlobSharedRef.h index 5154717ca..d33e2a99a 100644 --- a/dash/include/dash/GlobSharedRef.h +++ b/dash/include/dash/GlobSharedRef.h @@ -176,8 +176,7 @@ class GlobSharedRef } else if (!DART_GPTR_ISNULL(_gptr)) { DASH_LOG_TRACE_VAR("GlobSharedRef.T()", _gptr); T t; - dash::dart_storage ds(1); - dart_get_blocking(static_cast(&t), _gptr, ds.nelem, ds.dtype); + dash::internal::get_blocking(_gptr, &t, 1); return t; } DASH_THROW( @@ -201,8 +200,7 @@ class GlobSharedRef t = *_lptr; } else if (!DART_GPTR_ISNULL(_gptr)) { DASH_LOG_TRACE_VAR("GlobSharedRef.T()", _gptr); - dash::dart_storage ds(1); - dart_get_blocking(static_cast(&t), _gptr, ds.nelem, ds.dtype); + dash::internal::get_blocking(_gptr, &t, 1); } return t; } @@ -215,7 +213,7 @@ class GlobSharedRef } else if (!DART_GPTR_ISNULL(_gptr)) { DASH_LOG_TRACE_VAR("GlobSharedRef.T()", _gptr); dash::dart_storage ds(1); - dart_put_blocking(_gptr, static_cast(&val), ds.nelem, ds.dtype); + dash::internal::put_blocking(_gptr, &val, 1); } DASH_LOG_TRACE("GlobSharedRef.put >"); } @@ -233,12 +231,7 @@ class GlobSharedRef *_lptr = val; } else if (!DART_GPTR_ISNULL(_gptr)) { DASH_LOG_TRACE_VAR("GlobSharedRef.=", _gptr); - dash::dart_storage ds(1); - dart_put_blocking( - _gptr, - static_cast(&val), - ds.nelem, - ds.dtype); + dash::internal::put_blocking(_gptr, &val, 1); } DASH_LOG_TRACE("GlobSharedRef.= >"); return *this; diff --git a/dash/include/dash/Onesided.h b/dash/include/dash/Onesided.h index ff287488f..85b6c8c76 100644 --- a/dash/include/dash/Onesided.h +++ b/dash/include/dash/Onesided.h @@ -10,10 +10,16 @@ namespace dash { namespace internal { + /** + * Non-blocking write of \c nelem values from \c src to the global memory + * location referenced by \c gptr. + * + * \sa dart_put + */ template inline void - put(const T *src, const dart_gptr_t& gptr, size_t nelem) { + put(const dart_gptr_t& gptr, const T *src, size_t nelem) { dash::dart_storage ds(nelem); DASH_ASSERT_RETURNS( dart_put(gptr, @@ -24,24 +30,36 @@ namespace internal { DART_OK); } + /** + * Non-blocking read of \c nelem values the global memory + * location referenced by \c gptr into memory referenced by \c src. + * + * \sa dart_get + */ template inline void get(const dart_gptr_t& gptr, T *dst, size_t nelem) { dash::dart_storage ds(nelem); DASH_ASSERT_RETURNS( - dart_get(gptr, - dst, + dart_get(dst, + gptr, ds.nelem, ds.dtype, ds.dtype), DART_OK); } + /** + * Blocking write of \c nelem values from \c src to the global memory + * location referenced by \c gptr. + * + * \sa dart_put_blocking + */ template inline void - put_blocking(const T *src, const dart_gptr_t& gptr, size_t nelem) { + put_blocking(const dart_gptr_t& gptr, const T *src, size_t nelem) { dash::dart_storage ds(nelem); DASH_ASSERT_RETURNS( dart_put_blocking(gptr, @@ -52,20 +70,78 @@ namespace internal { DART_OK); } + /** + * Blocking read of \c nelem values the global memory + * location referenced by \c gptr into memory referenced by \c src. + * + * \sa dart_get_blocking + */ template inline void get_blocking(const dart_gptr_t& gptr, T *dst, size_t nelem) { dash::dart_storage ds(nelem); DASH_ASSERT_RETURNS( - dart_get_blocking(gptr, - dst, + dart_get_blocking(dst, + gptr, ds.nelem, ds.dtype, ds.dtype), DART_OK); } + /** + * Write of \c nelem values from \c src to the global memory + * location referenced by \c gptr. Creates a handle that can be used to + * wait for completion. + * + * \sa dart_put_handle + */ + template + inline + void + put_handle( + const dart_gptr_t & gptr, + const T * src, + size_t nelem, + dart_handle_t * handle) { + dash::dart_storage ds(nelem); + DASH_ASSERT_RETURNS( + dart_put_handle(gptr, + src, + ds.nelem, + ds.dtype, + ds.dtype, + handle), + DART_OK); + } + + /** + * Non-blocking read of \c nelem values the global memory + * location referenced by \c gptr into memory referenced by \c src. + * Creates a handle that can be used to wait for completion. + * + * \sa dart_get_handle + */ + template + inline + void + get_handle( + const dart_gptr_t & gptr, + T * dst, + size_t nelem, + dart_handle_t * handle) { + dash::dart_storage ds(nelem); + DASH_ASSERT_RETURNS( + dart_get_handle(dst, + gptr, + ds.nelem, + ds.dtype, + ds.dtype, + handle), + DART_OK); + } + } // namespace internal /** @@ -107,7 +183,7 @@ void put_value_async( /// [IN] Global pointer referencing target address of value const GlobPtrType & gptr) { - dash::internal::put(&newval, gptr.dart_gptr(), 1); + dash::internal::put(gptr.dart_gptr(), &newval, 1); } /** @@ -141,7 +217,7 @@ void put_value( /// [IN] Global pointer referencing target address of value const GlobPtrType & gptr) { - dash::internal::put_blocking(&newval, gptr.dart_gptr(), 1); + dash::internal::put_blocking(gptr.dart_gptr(), &newval, 1); } /** diff --git a/dash/include/dash/algorithm/Copy.h b/dash/include/dash/algorithm/Copy.h index f89716e22..2baff0bd2 100644 --- a/dash/include/dash/algorithm/Copy.h +++ b/dash/include/dash/algorithm/Copy.h @@ -174,14 +174,10 @@ ValueType * copy_impl( "left:", total_elem_left); auto cur_in_first = g_in_first + num_elem_copied; auto cur_out_first = out_first + num_elem_copied; - dash::dart_storage ds(num_copy_elem); - DASH_ASSERT_RETURNS( - dart_get_blocking( - cur_out_first, - cur_in_first.dart_gptr(), - ds.nelem, - ds.dtype), - DART_OK); + dash::internal::get_blocking( + cur_in_first.dart_gptr(), + cur_out_first, + num_copy_elem); num_elem_copied += num_copy_elem; } } else { @@ -228,17 +224,7 @@ ValueType * copy_impl( "left:", total_elem_left); auto dest_ptr = out_first + num_elem_copied; auto src_gptr = cur_in_first.dart_gptr(); - dash::dart_storage ds(num_copy_elem); - if (dart_get_blocking( - dest_ptr, - src_gptr, - ds.nelem, - ds.dtype) - != DART_OK) { - DASH_LOG_ERROR("dash::copy_impl", "dart_get failed"); - DASH_THROW( - dash::exception::RuntimeError, "dart_get failed"); - } + dash::internal::get_blocking(src_gptr, dest_ptr, num_copy_elem); num_elem_copied += num_copy_elem; } } @@ -327,27 +313,19 @@ dash::Future copy_async_impl( auto cur_in_first = g_in_first + num_elem_copied; auto cur_out_first = out_first + num_elem_copied; #ifdef DASH__ALGORITHM__COPY__USE_FLUSH - dash::dart_storage ds(num_copy_elem); - DASH_ASSERT_RETURNS( - dart_get( - cur_out_first, - cur_in_first.dart_gptr(), - ds.nelem, - ds.dtype), - DART_OK); + dash::internal::get( + cur_in_first.dart_gptr(), + cur_out_first, + num_copy_elem); req_handles.push_back(in_first.dart_gptr()); #else dart_handle_t get_handle; - dash::dart_storage ds(num_copy_elem); - DASH_ASSERT_RETURNS( - dart_get_handle( - cur_out_first, - cur_in_first.dart_gptr(), - ds.nelem, - ds.dtype, - &get_handle), - DART_OK); - if (get_handle != NULL) { + dash::internal::get_handle( + cur_in_first.dart_gptr(), + cur_out_first, + num_copy_elem, + &get_handle); + if (get_handle != DART_HANDLE_NULL) { req_handles.push_back(get_handle); } #endif @@ -398,30 +376,19 @@ dash::Future copy_async_impl( auto src_gptr = cur_in_first.dart_gptr(); auto dest_ptr = out_first + num_elem_copied; #ifdef DASH__ALGORITHM__COPY__USE_FLUSH - dash::dart_storage ds(num_copy_elem); - if (dart_get( - dest_ptr, + dash::internal::get( src_gptr, - ds.nelem, - ds.dtype) - != DART_OK) { - DASH_LOG_ERROR("dash::copy_async_impl", "dart_get failed"); - DASH_THROW( - dash::exception::RuntimeError, "dart_get failed"); - } + dest_ptr, + num_copy_elem); req_handles.push_back(src_gptr); #else dart_handle_t get_handle; - dash::dart_storage ds(num_copy_elem); - DASH_ASSERT_RETURNS( - dart_get_handle( - dest_ptr, - src_gptr, - ds.nelem, - ds.dtype, - &get_handle), - DART_OK); - if (get_handle != NULL) { + dash::internal::get_handle( + src_gptr, + dest_ptr, + num_copy_elem, + &get_handle); + if (get_handle != DART_HANDLE_NULL) { req_handles.push_back(get_handle); } #endif @@ -494,14 +461,10 @@ GlobOutputIt copy_impl( "g_out_first:", out_first.pos()); auto num_elements = std::distance(in_first, in_last); - dash::dart_storage ds(num_elements); - DASH_ASSERT_RETURNS( - dart_put_blocking( - out_first.dart_gptr(), - in_first, - ds.nelem, - ds.dtype), - DART_OK); + dash::internal::put_blocking( + out_first.dart_gptr(), + in_first, + num_elements); auto out_last = out_first + num_elements; DASH_LOG_TRACE("dash::copy_impl >", @@ -538,30 +501,19 @@ dash::Future copy_async_impl( auto src_ptr = in_first; auto dest_gptr = out_first.dart_gptr(); #ifdef DASH__ALGORITHM__COPY__USE_FLUSH - dash::dart_storage ds(num_copy_elem); - if (dart_put( + dash::internal::put( dest_gptr, src_ptr, - ds.nelem, - ds.dtype) - != DART_OK) { - DASH_LOG_ERROR("dash::copy_async_impl", "dart_put failed"); - DASH_THROW( - dash::exception::RuntimeError, "dart_put failed"); - } + num_copy_elem); req_handles.push_back(dest_gptr); #else dart_handle_t put_handle; - dash::dart_storage ds(num_copy_elem); - DASH_ASSERT_RETURNS( - dart_put_handle( + dash::internal::put_handle( dest_gptr, src_ptr, - ds.nelem, - ds.dtype, - &put_handle), - DART_OK); - if (put_handle != NULL) { + num_copy_elem + &put_handle); + if (put_handle != DART_HANDLE_NULL) { req_handles.push_back(put_handle); } #endif @@ -867,7 +819,7 @@ ValueType * copy( auto total_copy_elem = in_last - in_first; // Instead of testing in_first.local() and in_last.local(), this test for - // a local-only range only requires one call to in_first.local() which + // a local-only range only requires one call to in_first.local() which // increases throughput by ~10% for local ranges. if (num_local_elem == total_copy_elem) { // Entire input range is local: diff --git a/dash/include/dash/experimental/HaloMatrix.h b/dash/include/dash/experimental/HaloMatrix.h index 224a19941..9103b18ac 100644 --- a/dash/include/dash/experimental/HaloMatrix.h +++ b/dash/include/dash/experimental/HaloMatrix.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -230,8 +231,9 @@ class HaloMatrix auto off = _halomemory.haloPos(dim, region); auto it = data.blockview.begin(); for(auto i = 0; i < data.num_handles; ++i, it += data.cont_elems){ - dash::dart_storage ds(data.cont_elems); - dart_get_handle (off + ds.nelem * i, it.dart_gptr(), ds.nelem, ds.dtype, &(data.handle[i])); + dash::internal::get_handle( + it.dart_gptr(), off + data.cont_elems * i, + data.cont_elems, &(data.handle[i])); } if(!async) dart_waitall(data.handle, data.num_handles); diff --git a/dash/include/dash/memory/GlobHeapMem.h b/dash/include/dash/memory/GlobHeapMem.h index 0d74fe929..5f61fb2ad 100644 --- a/dash/include/dash/memory/GlobHeapMem.h +++ b/dash/include/dash/memory/GlobHeapMem.h @@ -1176,16 +1176,13 @@ class GlobHeapMem u_num_attach_buckets, 0); dart_gptr_t u_attach_buckets_sizes_gptr = attach_buckets_sizes_gptr; dart_gptr_setunit(&u_attach_buckets_sizes_gptr, u); - dash::dart_storage ds(u_num_attach_buckets); - DASH_ASSERT_RETURNS( - dart_get_blocking( - // local dest: - u_attach_buckets_sizes.data(), - // global source: - u_attach_buckets_sizes_gptr, - // request bytes (~= number of sizes) from unit u: - ds.nelem, ds.dtype), - DART_OK); + dash::internal::get_blocking( + // global source: + u_attach_buckets_sizes_gptr, + // local dest: + u_attach_buckets_sizes.data(), + // request bytes (~= number of sizes) from unit u: + u_num_attach_buckets); // Update local snapshot of cumulative bucket sizes at unit u: for (int bi = 0; bi < u_num_attach_buckets; ++bi) { size_type single_bkt_size = u_attach_buckets_sizes[bi]; diff --git a/dash/test/dart/DARTOnesidedTest.cc b/dash/test/dart/DARTOnesidedTest.cc index 455bc0b28..aaec42122 100644 --- a/dash/test/dart/DARTOnesidedTest.cc +++ b/dash/test/dart/DARTOnesidedTest.cc @@ -24,7 +24,7 @@ TEST_F(DARTOnesidedTest, GetBlockingSingleBlock) int g_src_index = unit_src * block_size; // Copy values: dash::dart_storage ds(block_size); - LOG_MESSAGE("DART storage: dtype:%d nelem:%zu", ds.dtype, ds.nelem); + LOG_MESSAGE("DART storage: dtype:%ld nelem:%zu", ds.dtype, ds.nelem); dart_get_blocking( local_array, // lptr dest (array.begin() + g_src_index).dart_gptr(), // gptr start @@ -66,7 +66,7 @@ TEST_F(DARTOnesidedTest, GetBlockingSingleBlockTeam) int g_src_index = unit_src * block_size; // Copy values: dash::dart_storage ds(block_size); - LOG_MESSAGE("DART storage: dtype:%d nelem:%zu", ds.dtype, ds.nelem); + LOG_MESSAGE("DART storage: dtype:%ld nelem:%zu", ds.dtype, ds.nelem); dart_get_blocking( local_array, // lptr dest (array.begin() + g_src_index).dart_gptr(), // gptr start @@ -99,7 +99,7 @@ TEST_F(DARTOnesidedTest, GetBlockingTwoBlocks) array.barrier(); // Copy values from first two blocks: dash::dart_storage ds(num_elem_copy); - LOG_MESSAGE("DART storage: dtype:%d nelem:%zu", ds.dtype, ds.nelem); + LOG_MESSAGE("DART storage: dtype:%ld nelem:%zu", ds.dtype, ds.nelem); dart_get_blocking( local_array, // lptr dest array.begin().dart_gptr(), // gptr start @@ -143,7 +143,7 @@ TEST_F(DARTOnesidedTest, GetHandleAllRemote) dart_handle_t handle; dash::dart_storage ds(block_size); - LOG_MESSAGE("DART storage: dtype:%d nelem:%zu", ds.dtype, ds.nelem); + LOG_MESSAGE("DART storage: dtype:%ld nelem:%zu", ds.dtype, ds.nelem); EXPECT_EQ_U( DART_OK, dart_get_handle( From 3fb5adbceab7e925747c96c336e11c3fb47b13ba Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Thu, 16 Nov 2017 15:17:55 +0900 Subject: [PATCH 08/31] Catch const and volatile types in dash::dart_datatype --- dash/include/dash/Types.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dash/include/dash/Types.h b/dash/include/dash/Types.h index ec40f787c..889c6e1bb 100644 --- a/dash/include/dash/Types.h +++ b/dash/include/dash/Types.h @@ -159,6 +159,12 @@ struct dart_datatype { static constexpr const dart_datatype_t value = DART_TYPE_DOUBLE; }; +template +struct dart_datatype : dart_datatype { }; + +template +struct dart_datatype : dart_datatype { }; + namespace internal { From 99f4ada29d94d0fe397916681f24b3b9c42a0f42 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Thu, 16 Nov 2017 15:18:17 +0900 Subject: [PATCH 09/31] Don't query the size of MPI_TYPE_NULL --- dart-impl/mpi/src/dart_mpi_types.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/dart-impl/mpi/src/dart_mpi_types.c b/dart-impl/mpi/src/dart_mpi_types.c index a162a1084..1492de1ce 100644 --- a/dart-impl/mpi/src/dart_mpi_types.c +++ b/dart-impl/mpi/src/dart_mpi_types.c @@ -74,16 +74,18 @@ init_basic_datatype( dart_type->base_type = DART_TYPE_UNDEFINED; dart_type->mpi_type = mpi_type; dart_type->kind = DART_KIND_BASIC; - int ret = MPI_Type_size(mpi_type, &size); - if (ret != MPI_SUCCESS) { - DART_LOG_ERROR("Failed to query size of MPI data type!"); - dart_abort(-1); - } - dart_type->basic.size = size; - - // create the type used for large transfers - dart_type->max_type = create_max_datatype(mpi_type); + dart_type->basic.size = 0; + if (mpi_type != MPI_DATATYPE_NULL) { + int ret = MPI_Type_size(mpi_type, &size); + if (ret != MPI_SUCCESS) { + DART_LOG_ERROR("Failed to query size of MPI data type!"); + dart_abort(-1); + } + dart_type->basic.size = size; + // create the type used for large transfers + dart_type->max_type = create_max_datatype(mpi_type); + } } dart_ret_t From 776a577a2fac07128b929e664c86d500aaa1289e Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Fri, 17 Nov 2017 19:25:43 +0900 Subject: [PATCH 10/31] Add second type parameter for put/get and create strided datatypes on the fly --- dart-if/include/dash/dart/if/dart_types.h | 10 +- .../dash/dart/mpi/dart_communication_priv.h | 92 +- .../mpi/include/dash/dart/mpi/dart_segment.h | 4 +- dart-impl/mpi/src/dart_communication.c | 919 ++++++++++-------- dart-impl/mpi/src/dart_mpi_types.c | 95 +- 5 files changed, 665 insertions(+), 455 deletions(-) diff --git a/dart-if/include/dash/dart/if/dart_types.h b/dart-if/include/dash/dart/if/dart_types.h index cb9ae96eb..170115de9 100644 --- a/dart-if/include/dash/dart/if/dart_types.h +++ b/dart-if/include/dash/dart/if/dart_types.h @@ -692,11 +692,10 @@ dart_config_t; /** * Create a strided data type using blocks of size \c blocklen and a stride - * of \c stride. + * of \c stride. The number of elements copied using the resulting datatype + * has to be a multiple of \c blocklen. * * \param basetype The type of elements in the blocks. - * \param num_blocks The number of blocks of size \c blocklen separated by - * \c stride. * \param stride The stride between blocks. * \param blocklen The number of elements of type \c basetype in each block. * \param[out] newtype The newly created data type. @@ -708,7 +707,6 @@ dart_config_t; dart_ret_t dart_type_create_strided( dart_datatype_t basetype, - size_t num_blocks, size_t stride, size_t blocklen, dart_datatype_t * newtype); @@ -716,7 +714,9 @@ dart_type_create_strided( /** * Create an indexed data type using \c count blocks of size \c blocklen[i] - * with offsets \c offset[i] for each 0 <= i < count. + * with offsets \c offset[i] for each 0 <= i < count. The number of + * elements copied using the resulting datatype has to be a multiple of + * Sum(\c blocklen[0:i]). * * \param basetype The type of elements in the blocks. * \param count The number of blocks. diff --git a/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h b/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h index f17e6f383..34877d726 100644 --- a/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h +++ b/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h @@ -10,6 +10,8 @@ #include #include +#include +#include #include #include @@ -30,25 +32,39 @@ typedef enum { } dart_type_kind_t; typedef struct dart_datatype_struct { - MPI_Datatype mpi_type; + /// the underlying data-type (type == base_type for basic types) dart_datatype_t base_type; + /// the kind of this type (basic, strided, indexed) dart_type_kind_t kind; - MPI_Datatype max_type; + /// the overall number of elements in this type + size_t num_elem; union { - // used for basic types + /// used for basic types struct { + /// the size in bytes of this type size_t size; + /// the underlying MPI type + MPI_Datatype mpi_type; + /// the underlying MPI type used to handle large (>2GB) transfers + MPI_Datatype max_type; } basic; - // used for DART_KIND_STRIDED + /// used for DART_KIND_STRIDED + /// NOTE: the underlying MPI strided type is created dynamically based on + /// the number of blocks required. struct { + /// the stride between blocks of size \c num_elem int stride; - int blocklen; } strided; - // used for DART_KIND_INDEXED + /// used for DART_KIND_INDEXED struct { + /// the underlying MPI type + MPI_Datatype mpi_type; + /// the numbers of elements in each block int * blocklens; + /// the offsets at which each block starts int * offsets; - int count; + /// the number of blocks + int num_blocks; } indexed; }; } dart_datatype_struct_t; @@ -90,11 +106,6 @@ dart_datatype_struct_t * dart__mpi__datatype_struct( : (dart_datatype_struct_t *)dart_datatype; } -DART_INLINE -MPI_Datatype dart__mpi__datatype(dart_datatype_t dart_datatype) { - return dart__mpi__datatype_struct(dart_datatype)->mpi_type; -} - DART_INLINE int dart__mpi__datatype_sizeof(dart_datatype_t dart_type) { dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); @@ -112,6 +123,16 @@ bool dart__mpi__datatype_isbasic(dart_datatype_t dart_type) { return (dart__mpi__datatype_struct(dart_type)->kind == DART_KIND_BASIC); } +DART_INLINE +bool dart__mpi__datatype_isstrided(dart_datatype_t dart_type) { + return (dart__mpi__datatype_struct(dart_type)->kind == DART_KIND_STRIDED); +} + +DART_INLINE +bool dart__mpi__datatype_isindexed(dart_datatype_t dart_type) { + return (dart__mpi__datatype_struct(dart_type)->kind == DART_KIND_INDEXED); +} + DART_INLINE bool dart__mpi__datatype_samebase( dart_datatype_t lhs_type, @@ -123,10 +144,53 @@ bool dart__mpi__datatype_samebase( DART_INLINE MPI_Datatype dart__mpi__datatype_maxtype(dart_datatype_t dart_type) { dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); - return dts->max_type; + return (dts->kind == DART_KIND_BASIC) ? dts->basic.max_type + : dart__mpi__datatype_maxtype( + dts->base_type); } -char* dart__mpi__datatype_name(dart_datatype_t dart_type) DART_INTERNAL; +DART_INLINE +size_t dart__mpi__datatype_num_elem(dart_datatype_t dart_type) { + return (dart__mpi__datatype_struct(dart_type)->num_elem); +} + +MPI_Datatype +dart__mpi__create_strided_mpi( + dart_datatype_t dart_type, + size_t num_blocks) DART_INTERNAL; + +void +dart__mpi__destroy_strided_mpi(MPI_Datatype *mpi_type) DART_INTERNAL; + +DART_INLINE +void +dart__mpi__datatype_convert_mpi( + dart_datatype_t dart_type, + size_t dart_num_elem, + MPI_Datatype * mpi_type, + int * mpi_num_elem) +{ + dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); + switch(dts->kind) { + case DART_KIND_BASIC: + *mpi_num_elem = dart_num_elem; + *mpi_type = dts->basic.mpi_type; + break; + case DART_KIND_STRIDED: + *mpi_num_elem = 1; + *mpi_type = dart__mpi__create_strided_mpi( + dart_type, dart_num_elem / dts->num_elem); + break; + case DART_KIND_INDEXED: + *mpi_num_elem = dart_num_elem / dts->num_elem; + *mpi_type = dts->indexed.mpi_type; + break; + default: + // should not happen! + DART_ASSERT_MSG(NULL, "Unknown DART type detected!"); + } +} +char* dart__mpi__datatype_name(dart_datatype_t dart_type) DART_INTERNAL; #endif /* DART_ADAPT_COMMUNICATION_PRIV_H_INCLUDED */ diff --git a/dart-impl/mpi/include/dash/dart/mpi/dart_segment.h b/dart-impl/mpi/include/dash/dart/mpi/dart_segment.h index 52c07546d..8d977c63d 100644 --- a/dart-impl/mpi/include/dash/dart/mpi/dart_segment.h +++ b/dart-impl/mpi/include/dash/dart/mpi/dart_segment.h @@ -96,7 +96,9 @@ dart_segment_info_t * dart_segment_get_info( */ static inline MPI_Aint -dart_segment_disp(dart_segment_info_t *seginfo, dart_team_unit_t team_unit_id) +dart_segment_disp( + const dart_segment_info_t *seginfo, + dart_team_unit_t team_unit_id) { return (seginfo->disp != NULL) ? seginfo->disp[team_unit_id.id] : 0; } diff --git a/dart-impl/mpi/src/dart_communication.c b/dart-impl/mpi/src/dart_communication.c index 5babcb273..501304968 100644 --- a/dart-impl/mpi/src/dart_communication.c +++ b/dart-impl/mpi/src/dart_communication.c @@ -45,14 +45,32 @@ if (dart__unlikely(!dart__mpi__datatype_samebase(_src_type, _dst_type))){ \ char *src_name = dart__mpi__datatype_name(_src_type); \ char *dst_name = dart__mpi__datatype_name(dst_type); \ - DART_LOG_ERROR("dart_get ! Cannot convert base-types (%s vs %s)", \ - src_name, dst_name); \ + DART_LOG_ERROR("%s ! Cannot convert base-types (%s vs %s)", \ + __FUNCTION__, src_name, dst_name); \ free(src_name); \ free(dst_name); \ return DART_ERR_INVAL; \ } \ } while (0) +#define CHECK_NUM_ELEM(_src_type, _dst_type, _num_elem) \ + do { \ + size_t src_num_elem = dart__mpi__datatype_num_elem(_src_type); \ + size_t dst_num_elem = dart__mpi__datatype_num_elem(_dst_type); \ + if ((_num_elem % src_num_elem) != 0 || (_num_elem % dst_num_elem) != 0) { \ + char *src_name = dart__mpi__datatype_name(_src_type); \ + char *dst_name = dart__mpi__datatype_name(dst_type); \ + DART_LOG_ERROR("%s ! Type-mismatch would lead to truncation (%s vs %s)",\ + __FUNCTION__, src_name, dst_name); \ + free(src_name); \ + free(dst_name); \ + } \ + } while (0) + +#define CHECK_TYPE_CONSTRAINTS(_src_type, _dst_type, _num_elem) \ + CHECK_EQUAL_BASETYPE(_src_type, _dst_type); \ + CHECK_NUM_ELEM(_src_type, _dst_type, _num_elem); + #define CHECK_IS_BASICTYPE(_dtype) \ do { \ if (dart__unlikely(!dart__mpi__datatype_isbasic(_dtype))) { \ @@ -142,6 +160,380 @@ static dart_ret_t put_shared_mem( } #endif // !defined(DART_MPI_DISABLE_SHARED_WINDOWS) +/** + * Internal implementations of put/get with and without handles for + * basic data types and complex data types. + */ + +static inline +dart_ret_t +dart__mpi__get_basic( + const dart_team_data_t * team_data, + dart_team_unit_t team_unit_id, + const dart_segment_info_t * seginfo, + void * dest, + uint64_t offset, + size_t nelem, + dart_datatype_t dtype, + MPI_Request * reqs, + uint8_t * num_reqs) +{ + if (num_reqs) *num_reqs = 0; + + if (team_data->unitid == team_unit_id.id) { + // use direct memcpy if we are on the same unit + memcpy(dest, seginfo->selfbaseptr + offset, + nelem * dart__mpi__datatype_sizeof(dtype)); + DART_LOG_DEBUG("dart_get: memcpy nelem:%zu " + "source (coll.): offset:%lu -> dest: %p", + nelem, offset, dest); + return DART_OK; + } + +#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) + DART_LOG_DEBUG("dart_get: shared windows enabled"); + if (seginfo->segid >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { + return get_shared_mem(team_data, seginfo, dest, offset, + team_unit_id, nelem, dtype); + } +#else + DART_LOG_DEBUG("dart_get: shared windows disabled"); +#endif // !defined(DART_MPI_DISABLE_SHARED_WINDOWS) + + /* + * MPI uses offset type int, chunk up the get if necessary + */ + const size_t nchunks = nelem / MAX_CONTIG_ELEMENTS; + const size_t remainder = nelem % MAX_CONTIG_ELEMENTS; + + // source on another node or shared memory windows disabled + MPI_Win win = seginfo->win; + offset += dart_segment_disp(seginfo, team_unit_id); + char * dest_ptr = (char*) dest; + + if (nchunks > 0) { + if (reqs != NULL) { + DART_LOG_TRACE("dart_get: MPI_Rget (dest %p, size %zu)", + dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); + CHECK_MPI_RET( + MPI_Rget(dest_ptr, + nchunks, + dart__mpi__datatype_maxtype(dtype), + team_unit_id.id, + offset, + nchunks, + dart__mpi__datatype_maxtype(dtype), + win, + &reqs[(*num_reqs)++]), + "MPI_Rget"); + } else { + DART_LOG_TRACE("dart_get: MPI_Get (dest %p, size %zu)", + dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); + CHECK_MPI_RET( + MPI_Get(dest_ptr, + nchunks, + dart__mpi__datatype_maxtype(dtype), + team_unit_id.id, + offset, + nchunks, + dart__mpi__datatype_maxtype(dtype), + win), + "MPI_Get"); + } + offset += nchunks * MAX_CONTIG_ELEMENTS; + dest_ptr += nchunks * MAX_CONTIG_ELEMENTS; + } + + if (remainder > 0) { + if (reqs != NULL) { + DART_LOG_TRACE("dart_get: MPI_Rget (dest %p, size %zu)", + dest_ptr, remainder); + CHECK_MPI_RET( + MPI_Rget(dest_ptr, + remainder, + dart__mpi__datatype_struct(dtype)->basic.mpi_type, + team_unit_id.id, + offset, + remainder, + dart__mpi__datatype_struct(dtype)->basic.mpi_type, + win, + &reqs[(*num_reqs)++]), + "MPI_Rget"); + } else { + DART_LOG_TRACE("dart_get: MPI_Get (dest %p, size %zu)", + dest_ptr, remainder); + CHECK_MPI_RET( + MPI_Get(dest_ptr, + remainder, + dart__mpi__datatype_struct(dtype)->basic.mpi_type, + team_unit_id.id, + offset, + remainder, + dart__mpi__datatype_struct(dtype)->basic.mpi_type, + win), + "MPI_Get"); + } + } + return DART_OK; +} + +static inline +dart_ret_t +dart__mpi__get_complex( + dart_team_unit_t team_unit_id, + const dart_segment_info_t * seginfo, + void * dest, + uint64_t offset, + size_t nelem, + dart_datatype_t src_type, + dart_datatype_t dst_type, + MPI_Request * reqs, + uint8_t * num_reqs) +{ + if (num_reqs != NULL) *num_reqs = 0; + + CHECK_TYPE_CONSTRAINTS(src_type, dst_type, nelem); + + MPI_Win win = seginfo->win; + char * dest_ptr = (char*) dest; + offset += dart_segment_disp(seginfo, team_unit_id); + + MPI_Datatype src_mpi_type, dst_mpi_type; + int src_num_elem, dst_num_elem; + dart__mpi__datatype_convert_mpi( + src_type, nelem, &src_mpi_type, &src_num_elem); + dart__mpi__datatype_convert_mpi( + dst_type, nelem, &dst_mpi_type, &dst_num_elem); + if (reqs != NULL) { + DART_LOG_TRACE("dart_get: MPI_Rget (dest %p, size %zu)", dest_ptr, nelem); + CHECK_MPI_RET( + MPI_Rget(dest_ptr, + dst_num_elem, + dst_mpi_type, + team_unit_id.id, + offset, + src_num_elem, + src_mpi_type, + win, + &reqs[(*num_reqs)++]), + "MPI_Rget"); + } else { + DART_LOG_TRACE("dart_get: MPI_Get (dest %p, size %zu)", dest_ptr, nelem); + CHECK_MPI_RET( + MPI_Get(dest_ptr, + dst_num_elem, + dst_mpi_type, + team_unit_id.id, + offset, + src_num_elem, + src_mpi_type, + win), + "MPI_Get"); + } + // clean-up strided data types + if (dart__mpi__datatype_isstrided(src_type)) { + dart__mpi__destroy_strided_mpi(&src_mpi_type); + } + if (dart__mpi__datatype_isstrided(dst_type)) { + dart__mpi__destroy_strided_mpi(&dst_mpi_type); + } + return DART_OK; +} + +static inline +dart_ret_t +dart__mpi__put_basic( + const dart_team_data_t * team_data, + dart_team_unit_t team_unit_id, + const dart_segment_info_t * seginfo, + const void * src, + uint64_t offset, + size_t nelem, + dart_datatype_t dtype, + MPI_Request * reqs, + uint8_t * num_reqs, + bool * flush_required_ptr) +{ + if (num_reqs) *num_reqs = 0; + + /* copy data directly if we are on the same unit */ + if (team_unit_id.id == team_data->unitid) { + if (flush_required_ptr) *flush_required_ptr = false; + memcpy(seginfo->selfbaseptr + offset, src, + nelem * dart__mpi__datatype_sizeof(dtype)); + DART_LOG_DEBUG("dart_put: memcpy nelem:%zu (from global allocation)" + "offset: %"PRIu64"", nelem, offset); + return DART_OK; + } + +#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) + DART_LOG_DEBUG("dart_put: shared windows enabled"); + if (seginfo->segid >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { + if (flush_required_ptr) *flush_required_ptr = false; + return put_shared_mem(team_data, seginfo, src, offset, + team_unit_id, nelem, dtype); + } +#else + DART_LOG_DEBUG("dart_put: shared windows disabled"); +#endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ + + if (flush_required_ptr) *flush_required_ptr = true; + + // source on another node or shared memory windows disabled + MPI_Win win = seginfo->win; + offset += dart_segment_disp(seginfo, team_unit_id); + const char * src_ptr = (const char*) src; + + // chunk up the put + const size_t nchunks = nelem / MAX_CONTIG_ELEMENTS; + const size_t remainder = nelem % MAX_CONTIG_ELEMENTS; + + if (nchunks > 0) { + if (reqs != NULL) { + DART_LOG_TRACE("dart_put: MPI_Rput (src %p, size %zu)", + src_ptr, nchunks * MAX_CONTIG_ELEMENTS); + CHECK_MPI_RET( + MPI_Rput(src_ptr, + nchunks, + dart__mpi__datatype_struct(dtype)->basic.max_type, + team_unit_id.id, + offset, + nchunks, + dart__mpi__datatype_struct(dtype)->basic.max_type, + win, + &reqs[(*num_reqs)++]), + "MPI_Rput"); + } else { + DART_LOG_TRACE("dart_put: MPI_Put (src %p, size %zu)", + src_ptr, nchunks * MAX_CONTIG_ELEMENTS); + CHECK_MPI_RET( + MPI_Put(src_ptr, + nchunks, + dart__mpi__datatype_struct(dtype)->basic.max_type, + team_unit_id.id, + offset, + nchunks, + dart__mpi__datatype_struct(dtype)->basic.max_type, + win), + "MPI_Put"); + } + offset += nchunks * MAX_CONTIG_ELEMENTS; + src_ptr += nchunks * MAX_CONTIG_ELEMENTS; + } + + if (remainder > 0) { + DART_LOG_TRACE("dart_put: MPI_Put (src %p, size %zu)", src_ptr, remainder); + + if (reqs != NULL) { + DART_LOG_TRACE("dart_put: MPI_Rput (src %p, size %zu)", + src_ptr, remainder); + CHECK_MPI_RET( + MPI_Rput(src_ptr, + remainder, + dart__mpi__datatype_struct(dtype)->basic.mpi_type, + team_unit_id.id, + offset, + remainder, + dart__mpi__datatype_struct(dtype)->basic.mpi_type, + win, + &reqs[(*num_reqs)++]), + "MPI_Rput"); + } else { + DART_LOG_TRACE("dart_put: MPI_Put (src %p, size %zu)", + src_ptr, remainder); + CHECK_MPI_RET( + MPI_Put(src_ptr, + remainder, + dart__mpi__datatype_struct(dtype)->basic.mpi_type, + team_unit_id.id, + offset, + remainder, + dart__mpi__datatype_struct(dtype)->basic.mpi_type, + win), + "MPI_Put"); + } + } + return DART_OK; +} + +static inline +dart_ret_t +dart__mpi__put_complex( + dart_team_unit_t team_unit_id, + const dart_segment_info_t * seginfo, + const void * src, + uint64_t offset, + size_t nelem, + dart_datatype_t src_type, + dart_datatype_t dst_type, + MPI_Request * reqs, + uint8_t * num_reqs, + bool * flush_required_ptr) +{ + if (flush_required_ptr) *flush_required_ptr = true; + if (num_reqs) *num_reqs = 0; + + // slow path for derived types + CHECK_TYPE_CONSTRAINTS(src_type, dst_type, nelem); + + MPI_Win win = seginfo->win; + const char * src_ptr = (const char*) src; + offset += dart_segment_disp(seginfo, team_unit_id); + + MPI_Datatype src_mpi_type, dst_mpi_type; + int src_num_elem, dst_num_elem; + dart__mpi__datatype_convert_mpi( + src_type, nelem, &src_mpi_type, &src_num_elem); + dart__mpi__datatype_convert_mpi( + dst_type, nelem, &dst_mpi_type, &dst_num_elem); + + if (reqs != NULL) { + + DART_LOG_TRACE( + "dart_put: MPI_Rput (src %p, size %zu, src_type %p, dst_type %p)", + src_ptr, nelem, src_mpi_type, dst_mpi_type); + + CHECK_MPI_RET( + MPI_Rput(src_ptr, + src_num_elem, + src_mpi_type, + team_unit_id.id, + offset, + dst_num_elem, + dst_mpi_type, + win, + &reqs[(*num_reqs)++]), + "MPI_Rput"); + + } else { + + DART_LOG_TRACE("dart_put: MPI_Put (src %p, size %zu)", src_ptr, nelem); + + CHECK_MPI_RET( + MPI_Put(src_ptr, + src_num_elem, + src_mpi_type, + team_unit_id.id, + offset, + dst_num_elem, + dst_mpi_type, + win), + "MPI_Put"); + + } + // clean-up strided data types + if (dart__mpi__datatype_isstrided(src_type)) { + dart__mpi__destroy_strided_mpi(&src_mpi_type); + } + if (dart__mpi__datatype_isstrided(dst_type)) { + dart__mpi__destroy_strided_mpi(&dst_mpi_type); + } + return DART_OK; +} + +/** + * Public interface for put/get. + */ dart_ret_t dart_get( void * dest, @@ -155,14 +547,11 @@ dart_ret_t dart_get( dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); dart_team_t teamid = gptr.teamid; - CHECK_EQUAL_BASETYPE(src_type, dst_type); - dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_get ! failed: Unknown team %i!", teamid); return DART_ERR_INVAL; } - CHECK_UNITID_RANGE(team_unit_id, team_data); DART_LOG_DEBUG("dart_get() uid:%d o:%"PRIu64" s:%d t:%d nelem:%zu", @@ -176,75 +565,23 @@ dart_ret_t dart_get( return DART_ERR_INVAL; } + dart_ret_t ret = DART_OK; + // leave complex data type handling to MPI if (dart__mpi__datatype_isbasic(src_type) && dart__mpi__datatype_isbasic(dst_type)) { - if (team_data->unitid == team_unit_id.id) { - // use direct memcpy if we are on the same unit - memcpy(dest, seginfo->selfbaseptr + offset, - nelem * dart__mpi__datatype_sizeof(src_type)); - DART_LOG_DEBUG("dart_get: memcpy nelem:%zu " - "source (coll.): offset:%lu -> dest: %p", - nelem, offset, dest); - return DART_OK; - } - -#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_get: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - return get_shared_mem(team_data, seginfo, dest, offset, - team_unit_id, nelem, src_type); - } -#else - DART_LOG_DEBUG("dart_get: shared windows disabled"); -#endif // !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - } - - /* - * MPI uses offset type int, do not copy more than INT_MAX elements: - */ - // chunk up the get - const size_t nchunks = nelem / MAX_CONTIG_ELEMENTS; - const size_t remainder = nelem % MAX_CONTIG_ELEMENTS; - char * dest_ptr = (char*) dest; - - // source on another node or shared memory windows disabled - MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); - - if (nchunks > 0) { - DART_LOG_TRACE("dart_get: MPI_Get (dest %p, size %zu)", - dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); - CHECK_MPI_RET( - MPI_Get(dest_ptr, - nchunks, - dart__mpi__datatype_maxtype(src_type), - team_unit_id.id, - offset, - nchunks, - dart__mpi__datatype_maxtype(dst_type), - win), - "MPI_Get"); - offset += nchunks * MAX_CONTIG_ELEMENTS; - dest_ptr += nchunks * MAX_CONTIG_ELEMENTS; - } - - if (remainder > 0) { - DART_LOG_TRACE("dart_get: MPI_Get (dest %p, size %zu)", dest_ptr, remainder); - CHECK_MPI_RET( - MPI_Get(dest_ptr, - remainder, - dart__mpi__datatype(src_type), - team_unit_id.id, - offset, - remainder, - dart__mpi__datatype(dst_type), - win), - "MPI_Get"); + // fast-path for basic types + CHECK_EQUAL_BASETYPE(src_type, dst_type); + ret = dart__mpi__get_basic(team_data, team_unit_id, seginfo, dest, + offset, nelem, src_type, NULL, NULL); + } else { + // slow path for derived types + ret = dart__mpi__get_complex(team_unit_id, seginfo, dest, + offset, nelem, src_type, dst_type, NULL, NULL); } DART_LOG_DEBUG("dart_get > finished"); - return DART_OK; + return ret; } dart_ret_t dart_put( @@ -277,70 +614,23 @@ dart_ret_t dart_put( return DART_ERR_INVAL; } - // leave complex data type handling to MPI + dart_ret_t ret = DART_OK; + if (dart__mpi__datatype_isbasic(src_type) && dart__mpi__datatype_isbasic(dst_type)) { - /* copy data directly if we are on the same unit */ - if (team_unit_id.id == team_data->unitid) { - memcpy(seginfo->selfbaseptr + offset, src, - nelem * dart__mpi__datatype_sizeof(src_type)); - DART_LOG_DEBUG("dart_put: memcpy nelem:%zu (from global allocation)" - "offset: %"PRIu64"", nelem, offset); - return DART_OK; - } - -#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_put: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - return put_shared_mem(team_data, seginfo, src, offset, - team_unit_id, nelem, src_type); - } -#else - DART_LOG_DEBUG("dart_put: shared windows disabled"); -#endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ - } - // source on another node or shared memory windows disabled - MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); - - // chunk up the put - const size_t nchunks = nelem / MAX_CONTIG_ELEMENTS; - const size_t remainder = nelem % MAX_CONTIG_ELEMENTS; - const char * src_ptr = (const char*) src; - - if (nchunks > 0) { - DART_LOG_TRACE("dart_put: MPI_Put (src %p, size %zu)", - src_ptr, nchunks * MAX_CONTIG_ELEMENTS); - CHECK_MPI_RET( - MPI_Put(src_ptr, - nchunks, - dart__mpi__datatype_maxtype(src_type), - team_unit_id.id, - offset, - nchunks, - dart__mpi__datatype_maxtype(dst_type), - win), - "MPI_Put"); - offset += nchunks * MAX_CONTIG_ELEMENTS; - src_ptr += nchunks * MAX_CONTIG_ELEMENTS; - } - - if (remainder > 0) { - DART_LOG_TRACE("dart_put: MPI_Put (src %p, size %zu)", src_ptr, remainder); - - CHECK_MPI_RET( - MPI_Put(src_ptr, - remainder, - dart__mpi__datatype(src_type), - team_unit_id.id, - offset, - remainder, - dart__mpi__datatype(dst_type), - win), - "MPI_Put"); + // fast path for basic data types + CHECK_EQUAL_BASETYPE(src_type, dst_type); + ret = dart__mpi__put_basic(team_data, team_unit_id, seginfo, src, + offset, nelem, src_type, + NULL, NULL, NULL); + } else { + // slow path for complex data types + ret = dart__mpi__put_complex(team_unit_id, seginfo, src, + offset, nelem, src_type, dst_type, + NULL, NULL, NULL); } - return DART_OK; + return ret; } dart_ret_t dart_accumulate( @@ -356,9 +646,12 @@ dart_ret_t dart_accumulate( uint64_t offset = gptr.addr_or_offs.offset; int16_t seg_id = gptr.segid; dart_team_t teamid = gptr.teamid; - mpi_dtype = dart__mpi__datatype(dtype); + + CHECK_IS_BASICTYPE(dtype); + mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; mpi_op = dart__mpi__op(op); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_accumulate ! failed: Unknown team %i!", teamid); @@ -440,10 +733,10 @@ dart_ret_t dart_fetch_and_op( uint64_t offset = gptr.addr_or_offs.offset; int16_t seg_id = gptr.segid; dart_team_t teamid = gptr.teamid; - mpi_dtype = dart__mpi__datatype(dtype); - mpi_op = dart__mpi__op(op); CHECK_IS_BASICTYPE(dtype); + mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; + mpi_op = dart__mpi__op(op); dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { @@ -496,13 +789,13 @@ dart_ret_t dart_compare_and_swap( uint64_t offset = gptr.addr_or_offs.offset; int16_t seg_id = gptr.segid; dart_team_t teamid = gptr.teamid; - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); if (dtype > DART_TYPE_LONGLONG) { DART_LOG_ERROR("dart_compare_and_swap ! failed: " "only valid on integral types"); return DART_ERR_INVAL; } + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; dart_team_data_t *team_data = dart_adapt_teamlist_get(gptr.teamid); if (team_data == NULL) { @@ -558,8 +851,6 @@ dart_ret_t dart_get_handle( *handleptr = DART_HANDLE_NULL; - CHECK_EQUAL_BASETYPE(src_type, dst_type); - dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_get_handle ! failed: Unknown team %i!", teamid); @@ -576,89 +867,40 @@ dart_ret_t dart_get_handle( return DART_ERR_INVAL; } - DART_LOG_DEBUG("dart_get_handle() uid:%d o:%"PRIu64" s:%d t:%d, nelem:%zu", - team_unit_id.id, offset, seg_id, gptr.teamid, nelem); - DART_LOG_TRACE("dart_get_handle: allocated handle:%p", (void *)(*handleptr)); - - // leave complex data type handling to MPI - if (dart__mpi__datatype_isbasic(src_type) && - dart__mpi__datatype_isbasic(dst_type)) { -#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_get_handle: shared windows enabled"); - - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - dart_ret_t ret = get_shared_mem(team_data, seginfo, dest, offset, - team_unit_id, nelem, src_type); - // return NULL request - return ret; - } -#else - DART_LOG_DEBUG("dart_get_handle: shared windows disabled"); -#endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ - } - - /* - * MPI shared windows disabled or target and calling unit are on different - * nodes, use MPI_RGet: - */ - - MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); - - // chunk up the get - const size_t nchunks = nelem / MAX_CONTIG_ELEMENTS; - const size_t remainder = nelem % MAX_CONTIG_ELEMENTS; - char * dest_ptr = (char*) dest; + MPI_Win win = seginfo->win; dart_handle_t handle = calloc(1, sizeof(struct dart_handle_struct)); handle->dest = team_unit_id.id; handle->win = win; handle->needs_flush = false; - if (nchunks > 0) { - DART_LOG_TRACE("dart_get_handle: MPI_Rget (dest %p, size %zu)", - dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); - if (MPI_Rget(dest_ptr, - nchunks, - dart__mpi__datatype_maxtype(src_type), - team_unit_id.id, - offset, - nchunks, - dart__mpi__datatype_maxtype(dst_type), - win, - &handle->reqs[handle->num_reqs++]) != MPI_SUCCESS) { - free(handle); - DART_LOG_ERROR("dart_get_handle ! MPI_Rget failed"); - return DART_ERR_INVAL; - } - offset += nchunks * MAX_CONTIG_ELEMENTS; - dest_ptr += nchunks * MAX_CONTIG_ELEMENTS; - } - if (remainder > 0) { - DART_LOG_TRACE( - "dart_get_handle: MPI_Rget (dest %p, size %zu)", dest_ptr, remainder); - if (MPI_Rget(dest_ptr, - remainder, - dart__mpi__datatype(src_type), - team_unit_id.id, - offset, - remainder, - dart__mpi__datatype(dst_type), - win, - &handle->reqs[handle->num_reqs++]) != MPI_SUCCESS) { - free(handle); - DART_LOG_ERROR("dart_get_handle ! MPI_Rget failed"); - return DART_ERR_INVAL; - } + DART_LOG_DEBUG("dart_get_handle() uid:%d o:%"PRIu64" s:%d t:%d, nelem:%zu", + team_unit_id.id, offset, seg_id, gptr.teamid, nelem); + DART_LOG_TRACE("dart_get_handle: allocated handle:%p", (void *)(handle)); + + dart_ret_t ret = DART_OK; + + // leave complex data type handling to MPI + if (dart__mpi__datatype_isbasic(src_type) && + dart__mpi__datatype_isbasic(dst_type)) { + // fast-path for basic types + CHECK_EQUAL_BASETYPE(src_type, dst_type); + ret = dart__mpi__get_basic(team_data, team_unit_id, seginfo, dest, + offset, nelem, src_type, + handle->reqs, &handle->num_reqs); + } else { + // slow path for derived types + ret = dart__mpi__get_complex(team_unit_id, seginfo, dest, + offset, nelem, src_type, dst_type, + handle->reqs, &handle->num_reqs); } *handleptr = handle; - DART_LOG_TRACE("dart_get_handle > handle(%p) dest:%d win:%"PRIu64, - (void*)(handle), handle->dest, - (unsigned long)win); - return DART_OK; + DART_LOG_TRACE("dart_get_handle > handle(%p) dest:%d", + (void*)(handle), handle->dest); + return ret; } dart_ret_t dart_put_handle( @@ -695,58 +937,39 @@ dart_ret_t dart_put_handle( } MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); // chunk up the put dart_handle_t handle = calloc(1, sizeof(struct dart_handle_struct)); handle->dest = team_unit_id.id; handle->win = win; handle->needs_flush = true; - const size_t nchunks = nelem / MAX_CONTIG_ELEMENTS; - const size_t remainder = nelem % MAX_CONTIG_ELEMENTS; - const char * src_ptr = (const char*) src; - if (nchunks > 0) { - DART_LOG_TRACE("dart_put_handle: MPI_Rput (src %p, size %zu)", - src_ptr, nchunks * MAX_CONTIG_ELEMENTS); - if (MPI_Rput(src_ptr, - nchunks, - dart__mpi__datatype_maxtype(src_type), - team_unit_id.id, - offset, - nchunks, - dart__mpi__datatype_maxtype(dst_type), - win, - &handle->reqs[handle->num_reqs++]) != MPI_SUCCESS) { - free(handle); - DART_LOG_ERROR("dart_put_handle ! MPI_Rput failed"); - return DART_ERR_INVAL; - } - src_ptr += nchunks * MAX_CONTIG_ELEMENTS; - offset += nchunks * MAX_CONTIG_ELEMENTS; - } + dart_ret_t ret = DART_OK; - if (remainder > 0) { - DART_LOG_TRACE( - "dart_put_handle: MPI_Rput (src %p, size %zu)", src_ptr, remainder); - if (MPI_Rput(src_ptr, - remainder, - dart__mpi__datatype(src_type), - team_unit_id.id, - offset, - remainder, - dart__mpi__datatype(dst_type), - win, - &handle->reqs[handle->num_reqs++]) != MPI_SUCCESS) { - free(handle); - DART_LOG_ERROR("dart_put_handle ! MPI_Put failed"); - return DART_ERR_INVAL; - } + if (dart__mpi__datatype_isbasic(src_type) && + dart__mpi__datatype_isbasic(dst_type)) { + // fast path for basic data types + CHECK_EQUAL_BASETYPE(src_type, dst_type); + ret = dart__mpi__put_basic(team_data, team_unit_id, seginfo, src, + offset, nelem, src_type, + handle->reqs, + &handle->num_reqs, + &handle->needs_flush); + } else { + // slow path for complex data types + ret = dart__mpi__put_complex(team_unit_id, seginfo, src, + offset, nelem, src_type, dst_type, + handle->reqs, + &handle->num_reqs, + &handle->needs_flush); } *handleptr = handle; - return DART_OK; + DART_LOG_TRACE("dart_put_handle > handle(%p) dest:%d", + (void*)(handle), handle->dest); + + return ret; } /* -- Blocking dart one-sided operations -- */ @@ -787,79 +1010,32 @@ dart_ret_t dart_put_blocking( DART_LOG_DEBUG("dart_put_blocking() uid:%d o:%"PRIu64" s:%d t:%d, nelem:%zu", team_unit_id.id, offset, seg_id, gptr.teamid, nelem); + MPI_Win win = seginfo->win; + + dart_ret_t ret = DART_OK; + bool needs_flush = false; - // leave complex data type handling to MPI if (dart__mpi__datatype_isbasic(src_type) && dart__mpi__datatype_isbasic(dst_type)) { - /* copy data directly if we are on the same unit */ - if (team_unit_id.id == team_data->unitid) { - memcpy(seginfo->selfbaseptr + offset, src, - nelem * dart__mpi__datatype_sizeof(src_type)); - DART_LOG_DEBUG("dart_put_blocking: memcpy nelem:%zu (from global allocation)" - "offset: %"PRIu64"", nelem, offset); - return DART_OK; - } - -#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_put_blocking: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - return put_shared_mem(team_data, seginfo, src, offset, - team_unit_id, nelem, src_type); - } -#else - DART_LOG_DEBUG("dart_put_blocking: shared windows disabled"); -#endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ + // fast path for basic data types + CHECK_EQUAL_BASETYPE(src_type, dst_type); + ret = dart__mpi__put_basic(team_data, team_unit_id, seginfo, src, + offset, nelem, src_type, + NULL, NULL, &needs_flush); + } else { + // slow path for complex data types + ret = dart__mpi__put_complex(team_unit_id, seginfo, src, + offset, nelem, src_type, dst_type, + NULL, NULL, &needs_flush); } - MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); - - /* - * Using MPI_Put as MPI_Win_flush is required to ensure remote completion. - */ - - // chunk up the put - const size_t nchunks = nelem / MAX_CONTIG_ELEMENTS; - const size_t remainder = nelem % MAX_CONTIG_ELEMENTS; - const char * src_ptr = (const char*) src; - - if (nchunks > 0) { - DART_LOG_TRACE("dart_put_blocking: MPI_Put (src %p, size %zu)", - src_ptr, nchunks * MAX_CONTIG_ELEMENTS); - CHECK_MPI_RET( - MPI_Put(src_ptr, - nchunks, - dart__mpi__datatype_maxtype(src_type), - team_unit_id.id, - offset, - nchunks, - dart__mpi__datatype_maxtype(dst_type), - win), - "MPI_Put"); - src_ptr += nchunks * MAX_CONTIG_ELEMENTS; - offset += nchunks * MAX_CONTIG_ELEMENTS; + if (ret == DART_OK && needs_flush) { + DART_LOG_DEBUG("dart_put_blocking: MPI_Win_flush"); + CHECK_MPI_RET(MPI_Win_flush(team_unit_id.id, win), "MPI_Win_flush"); } - if (remainder > 0) { - DART_LOG_TRACE( - "dart_put_blocking: MPI_Put (src %p, size %zu)", src_ptr, remainder); - CHECK_MPI_RET( - MPI_Put(src_ptr, - remainder, - dart__mpi__datatype(src_type), - team_unit_id.id, - offset, - remainder, - dart__mpi__datatype(dst_type), - win), - "MPI_Put"); - } - - DART_LOG_DEBUG("dart_put_blocking: MPI_Win_flush"); - CHECK_MPI_RET(MPI_Win_flush(team_unit_id.id, win), "MPI_Win_flush"); - DART_LOG_DEBUG("dart_put_blocking > finished"); - return DART_OK; + return ret; } /** @@ -900,86 +1076,31 @@ dart_ret_t dart_get_blocking( return DART_ERR_INVAL; } + dart_ret_t ret = DART_OK; + + MPI_Request reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL}; + uint8_t num_reqs = 0; + // leave complex data type handling to MPI if (dart__mpi__datatype_isbasic(src_type) && dart__mpi__datatype_isbasic(dst_type)) { - if (team_data->unitid == team_unit_id.id) { - // use direct memcpy if we are on the same unit - memcpy(dest, seginfo->selfbaseptr + offset, - nelem * dart__mpi__datatype_sizeof(src_type)); - DART_LOG_DEBUG("dart_get_blocking: memcpy nelem:%zu " - "source (coll.): offset:%lu -> dest: %p", - nelem, offset, dest); - return DART_OK; - } - -#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_get_blocking: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - return get_shared_mem(team_data, seginfo, dest, offset, - team_unit_id, nelem, src_type); - } -#else - DART_LOG_DEBUG("dart_get_blocking: shared windows disabled"); -#endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ - } - - /* - * MPI shared windows disabled or target and calling unit are on different - * nodes, use MPI_Rget: - */ - - MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); - - /* - * Using MPI_Get as MPI_Win_flush is required to ensure remote completion. - */ - // chunk up the get - const size_t nchunks = nelem / MAX_CONTIG_ELEMENTS; - const size_t remainder = nelem % MAX_CONTIG_ELEMENTS; - char * dest_ptr = (char*) dest; - MPI_Request reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL}; - int nreqs = 0; - - if (nchunks > 0) { - DART_LOG_TRACE("dart_get_blocking: MPI_Rget (dest %p, size %zu)", - dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); - CHECK_MPI_RET( - MPI_Rget(dest_ptr, - nchunks, - dart__mpi__datatype_maxtype(src_type), - team_unit_id.id, - offset, - nchunks, - dart__mpi__datatype_maxtype(dst_type), - win, - &reqs[nreqs++]), - "MPI_Rget"); - offset += nchunks * MAX_CONTIG_ELEMENTS; - dest_ptr += nchunks * MAX_CONTIG_ELEMENTS; + // fast-path for basic types + CHECK_EQUAL_BASETYPE(src_type, dst_type); + ret = dart__mpi__get_basic(team_data, team_unit_id, seginfo, dest, + offset, nelem, src_type, + reqs, &num_reqs); + } else { + // slow path for derived types + ret = dart__mpi__get_complex(team_unit_id, seginfo, dest, + offset, nelem, src_type, dst_type, + reqs, &num_reqs); } - if (remainder > 0) { - DART_LOG_TRACE( - "dart_get_blocking: MPI_Rget (dest %p, size %zu)", dest_ptr, remainder); - + if (ret == DART_OK && num_reqs > 0) { CHECK_MPI_RET( - MPI_Rget(dest_ptr, - remainder, - dart__mpi__datatype(src_type), - team_unit_id.id, - offset, - remainder, - dart__mpi__datatype(dst_type), - win, - &reqs[nreqs++]), - "MPI_Rget"); + MPI_Waitall(num_reqs, reqs, MPI_STATUSES_IGNORE), "MPI_Waitall"); } - CHECK_MPI_RET( - MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE), "MPI_Waitall"); - DART_LOG_DEBUG("dart_get_blocking > finished"); return DART_OK; } @@ -1551,7 +1672,7 @@ dart_ret_t dart_bcast( } if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; CHECK_MPI_RET( MPI_Bcast(src_ptr, remainder, mpi_dtype, root.id, comm), "MPI_Bcast"); @@ -1606,7 +1727,7 @@ dart_ret_t dart_scatter( } if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; CHECK_MPI_RET( MPI_Scatter( send_ptr, @@ -1670,7 +1791,7 @@ dart_ret_t dart_gather( } if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; CHECK_MPI_RET( MPI_Gather( send_ptr, @@ -1734,7 +1855,7 @@ dart_ret_t dart_allgather( } if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; CHECK_MPI_RET( MPI_Allgather( send_ptr, @@ -1805,7 +1926,7 @@ dart_ret_t dart_allgatherv( irecvdispls[i] = recvdispls[i]; } - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; if (MPI_Allgatherv( sendbuf, nsendelem, @@ -1840,7 +1961,7 @@ dart_ret_t dart_allreduce( CHECK_IS_BASICTYPE(dtype); MPI_Op mpi_op = dart__mpi__op(op); - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; /* * MPI uses offset type int, do not copy more than INT_MAX elements: @@ -1878,11 +1999,9 @@ dart_ret_t dart_reduce( dart_team_t team) { MPI_Comm comm; - MPI_Op mpi_op = dart__mpi__op(op); - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); - CHECK_IS_BASICTYPE(dtype); - + MPI_Op mpi_op = dart__mpi__op(op); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; /* * MPI uses offset type int, do not copy more than INT_MAX elements: */ @@ -1921,7 +2040,8 @@ dart_ret_t dart_send( dart_global_unit_t unit) { MPI_Comm comm; - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + CHECK_IS_BASICTYPE(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; dart_team_t team = DART_TEAM_ALL; /* @@ -1962,7 +2082,8 @@ dart_ret_t dart_recv( dart_global_unit_t unit) { MPI_Comm comm; - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + CHECK_IS_BASICTYPE(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; dart_team_t team = DART_TEAM_ALL; /* @@ -2009,8 +2130,12 @@ dart_ret_t dart_sendrecv( dart_global_unit_t src) { MPI_Comm comm; - MPI_Datatype mpi_send_dtype = dart__mpi__datatype(send_dtype); - MPI_Datatype mpi_recv_dtype = dart__mpi__datatype(recv_dtype); + CHECK_IS_BASICTYPE(send_dtype); + CHECK_IS_BASICTYPE(recv_dtype); + MPI_Datatype mpi_send_dtype = + dart__mpi__datatype_struct(send_dtype)->basic.mpi_type; + MPI_Datatype mpi_recv_dtype = + dart__mpi__datatype_struct(recv_dtype)->basic.mpi_type; dart_team_t team = DART_TEAM_ALL; /* diff --git a/dart-impl/mpi/src/dart_mpi_types.c b/dart-impl/mpi/src/dart_mpi_types.c index 1492de1ce..fffa44554 100644 --- a/dart-impl/mpi/src/dart_mpi_types.c +++ b/dart-impl/mpi/src/dart_mpi_types.c @@ -57,13 +57,6 @@ create_max_datatype(MPI_Datatype mpi_type) return max_type; } -static void -destroy_max_type(dart_datatype_struct_t *dart_type) -{ - MPI_Type_free(&dart_type->max_type); - dart_type->max_type = MPI_DATATYPE_NULL; -} - static void init_basic_datatype( dart_datatype_t dart_type_id, @@ -71,10 +64,11 @@ init_basic_datatype( { int size; struct dart_datatype_struct *dart_type = &__dart_base_types[dart_type_id]; - dart_type->base_type = DART_TYPE_UNDEFINED; - dart_type->mpi_type = mpi_type; - dart_type->kind = DART_KIND_BASIC; - dart_type->basic.size = 0; + dart_type->base_type = dart_type_id; + dart_type->basic.mpi_type = mpi_type; + dart_type->kind = DART_KIND_BASIC; + dart_type->basic.size = 0; + dart_type->num_elem = 0; if (mpi_type != MPI_DATATYPE_NULL) { int ret = MPI_Type_size(mpi_type, &size); if (ret != MPI_SUCCESS) { @@ -83,8 +77,11 @@ init_basic_datatype( } dart_type->basic.size = size; + // basic types only represent a single element + dart_type->num_elem = 1; + // create the type used for large transfers - dart_type->max_type = create_max_datatype(mpi_type); + dart_type->basic.max_type = create_max_datatype(mpi_type); } } @@ -116,13 +113,13 @@ char* dart__mpi__datatype_name(dart_datatype_t dart_type) buf = malloc(DART_TYPE_NAMELEN); char *base_name = dart__mpi__datatype_name(dts->base_type); snprintf(buf, DART_TYPE_NAMELEN, "INDEXED(%i:%s)", - dts->indexed.count, base_name); + dts->indexed.num_blocks, base_name); free(base_name); } else if (dts->kind == DART_KIND_STRIDED){ buf = malloc(DART_TYPE_NAMELEN); char *base_name = dart__mpi__datatype_name(dts->base_type); - snprintf(buf, DART_TYPE_NAMELEN, "STRIDED(%i:%i:%s)", - dts->strided.blocklen, dts->strided.stride, base_name); + snprintf(buf, DART_TYPE_NAMELEN, "STRIDED(%zu:%i:%s)", + dts->num_elem, dts->strided.stride, base_name); free(base_name); } else { DART_LOG_ERROR("INVALID data type detected!"); @@ -134,7 +131,6 @@ char* dart__mpi__datatype_name(dart_datatype_t dart_type) dart_ret_t dart_type_create_strided( dart_datatype_t basetype_id, - size_t num_blocks, size_t stride, size_t blocklen, dart_datatype_t * newtype) @@ -154,33 +150,53 @@ dart_type_create_strided( return DART_ERR_INVAL; } - if (num_blocks > INT_MAX || stride > INT_MAX || blocklen > INT_MAX) { + if (stride > INT_MAX || blocklen > INT_MAX) { DART_LOG_ERROR("dart_type_create_strided: arguments out of range (>INT_MAX)"); return DART_ERR_INVAL; } - MPI_Datatype mpi_dtype = basetype->mpi_type; - MPI_Datatype new_mpi_dtype; - MPI_Type_vector(num_blocks, blocklen, stride, mpi_dtype, &new_mpi_dtype); - MPI_Type_commit(&new_mpi_dtype); + //MPI_Datatype new_mpi_dtype; + //MPI_Type_vector(num_blocks, blocklen, stride, mpi_dtype, &new_mpi_dtype); + //MPI_Type_commit(&new_mpi_dtype); dart_datatype_struct_t *new_struct; new_struct = malloc(sizeof(struct dart_datatype_struct)); - new_struct->mpi_type = new_mpi_dtype; new_struct->base_type = basetype_id; new_struct->kind = DART_KIND_STRIDED; - new_struct->strided.blocklen = blocklen; + new_struct->num_elem = blocklen; new_struct->strided.stride = stride; - new_struct->max_type = create_max_datatype(new_mpi_dtype); *newtype = (dart_datatype_t)new_struct; - DART_LOG_TRACE("Created new strided data type %p (mpi_type %p)", - new_struct, new_mpi_dtype); + DART_LOG_TRACE("Created new strided data type %p", new_struct); return DART_OK; } +MPI_Datatype +dart__mpi__create_strided_mpi( + dart_datatype_t dart_type, + size_t num_blocks) +{ + MPI_Datatype new_mpi_dtype; + dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); + MPI_Type_vector( + num_blocks, // the number of blocks + dts->num_elem, // the number of elements per block + dts->strided.stride, // the number of elements between start of each block + dart__mpi__datatype_struct(dts->base_type)->basic.mpi_type, + &new_mpi_dtype); + MPI_Type_commit(&new_mpi_dtype); + printf("Created new strided MPI type %p (%d, %d, %d)\n", new_mpi_dtype, num_blocks, dts->num_elem, dts->strided.stride); + return new_mpi_dtype; +} + +void +dart__mpi__destroy_strided_mpi(MPI_Datatype *mpi_type) +{ + MPI_Type_free(mpi_type); +} + dart_ret_t dart_type_create_indexed( dart_datatype_t basetype, @@ -195,8 +211,8 @@ dart_type_create_indexed( } *newtype = DART_TYPE_UNDEFINED; - - if (dart__mpi__datatype_struct(basetype)->kind != DART_KIND_BASIC) { + dart_datatype_struct_t *basetype_struct = dart__mpi__datatype_struct(basetype); + if (basetype_struct->kind != DART_KIND_BASIC) { DART_LOG_ERROR("Only basic data types allowed in indexed datatypes!"); return DART_ERR_INVAL; } @@ -210,6 +226,7 @@ dart_type_create_indexed( int *mpi_blocklen = malloc(sizeof(int) * count); int *mpi_disps = malloc(sizeof(int) * count); + size_t num_elem = 0; for (size_t i = 0; i < count; ++i) { if (blocklen[i] > INT_MAX) { DART_LOG_ERROR("dart_type_create_indexed: blocklen[%zu] > INT_MAX", i); @@ -225,9 +242,10 @@ dart_type_create_indexed( } mpi_blocklen[i] = blocklen[i]; mpi_disps[i] = offset[i]; + num_elem += blocklen[i]; } - MPI_Datatype mpi_base_type = dart__mpi__datatype_struct(basetype)->mpi_type; + MPI_Datatype mpi_base_type = basetype_struct->basic.mpi_type; MPI_Datatype new_mpi_dtype; int ret = MPI_Type_indexed( count, mpi_blocklen, mpi_disps, mpi_base_type, &new_mpi_dtype); @@ -242,16 +260,17 @@ dart_type_create_indexed( dart_datatype_struct_t *new_struct; new_struct = malloc(sizeof(struct dart_datatype_struct)); new_struct->base_type = basetype; - new_struct->mpi_type = new_mpi_dtype; new_struct->kind = DART_KIND_INDEXED; - new_struct->indexed.blocklens = mpi_blocklen; - new_struct->indexed.offsets = mpi_disps; - new_struct->max_type = create_max_datatype(new_mpi_dtype); + new_struct->num_elem = num_elem; + new_struct->indexed.mpi_type = new_mpi_dtype; + new_struct->indexed.blocklens = mpi_blocklen; + new_struct->indexed.offsets = mpi_disps; + new_struct->indexed.num_blocks = count; *newtype = (dart_datatype_t)new_struct; - DART_LOG_TRACE("Created new indexed data type %p (mpi_type %p)", - new_struct, new_mpi_dtype); + DART_LOG_TRACE("Created new indexed data type %p (mpi_type %p) with %zu elements", + new_struct, new_mpi_dtype, num_elem); return DART_OK; } @@ -275,10 +294,9 @@ dart_type_destroy(dart_datatype_t *dart_type_ptr) dart_type->indexed.blocklens = NULL; free(dart_type->indexed.offsets); dart_type->indexed.offsets = NULL; + MPI_Type_free(&dart_type->indexed.mpi_type); } - destroy_max_type(dart_type); - free(dart_type); *dart_type_ptr = DART_TYPE_UNDEFINED; @@ -288,7 +306,8 @@ dart_type_destroy(dart_datatype_t *dart_type_ptr) static void destroy_basic_type(dart_datatype_t dart_type_id) { dart_datatype_struct_t *dart_type = dart__mpi__datatype_struct(dart_type_id); - destroy_max_type(dart_type); + MPI_Type_free(&dart_type->basic.max_type); + dart_type->basic.max_type = MPI_DATATYPE_NULL; } dart_ret_t From 40d4b79af086134774d9adfd89e826a89928e7f0 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Fri, 17 Nov 2017 19:27:18 +0900 Subject: [PATCH 11/31] Add first test case for strided get --- dash/test/dart/DARTDatatypesTest.cc | 75 +++++++++++++++++++++++++++++ dash/test/dart/DARTDatatypesTest.h | 21 ++++++++ 2 files changed, 96 insertions(+) create mode 100644 dash/test/dart/DARTDatatypesTest.cc create mode 100644 dash/test/dart/DARTDatatypesTest.h diff --git a/dash/test/dart/DARTDatatypesTest.cc b/dash/test/dart/DARTDatatypesTest.cc new file mode 100644 index 000000000..59ceb01b3 --- /dev/null +++ b/dash/test/dart/DARTDatatypesTest.cc @@ -0,0 +1,75 @@ + +#include +#include + +#include "DARTDatatypesTest.h" + +#include +#include +#include + +TEST_F(DARTDatatypesTest, StridedSimple) { + constexpr size_t num_elem_per_unit = 100; + constexpr size_t stride_size = 2; + + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + for (int i = 0; i < num_elem_per_unit; ++i) { + local_ptr[i] = i; + } + + dash::barrier(); + + /** + * Create a strided type and fetch elements from our neighbor. + * Data: 0 1 2 3 ... 10 11 12 ... 90 91 92 93 ... 99 + * Result: 0 10 20 ... 90 + */ + dart_datatype_t new_type; + dart_type_create_strided(DART_TYPE_INT, stride_size, 1, &new_type); + + dart_unit_t neighbor = (dash::myid() + 1) % dash::size(); + + // global-to-local strided-to-contig + int *buf = new int[num_elem_per_unit]; + memset(buf, 0, sizeof(int)*num_elem_per_unit); + gptr.unitid = neighbor; + dart_get_blocking(buf, gptr, num_elem_per_unit / stride_size, + new_type, DART_TYPE_INT); + + // the first 50 elements should have a value + for (int i = 0; i < num_elem_per_unit / stride_size; ++i) { + ASSERT_EQ_U(i*stride_size, buf[i]); + } + + // global-to-local strided-to-contig + memset(buf, 0, sizeof(int)*num_elem_per_unit); + + dart_get_blocking(buf, gptr, num_elem_per_unit / stride_size, + DART_TYPE_INT, new_type); + + // every other element should have a value + for (int i = 0; i < num_elem_per_unit; ++i) { + if (i%2 == 0) { + ASSERT_EQ_U(i/2, buf[i]); + } else { + ASSERT_EQ_U(0, buf[i]); + } + } + + dash::barrier(); + + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); + + delete[] buf; + + dart_type_destroy(&new_type); + +} + diff --git a/dash/test/dart/DARTDatatypesTest.h b/dash/test/dart/DARTDatatypesTest.h new file mode 100644 index 000000000..01388c635 --- /dev/null +++ b/dash/test/dart/DARTDatatypesTest.h @@ -0,0 +1,21 @@ +#ifndef DASH__TEST__DART_TYPES_TEST_H_ +#define DASH__TEST__DART_TYPES_TEST_H_ + +#include "../TestBase.h" + +/** + * Test fixture for onesided operations provided by DART. + */ +class DARTDatatypesTest : public dash::test::TestBase { +protected: + size_t _dash_id = 0; + size_t _dash_size = 0; + + virtual void SetUp() { + dash::test::TestBase::SetUp(); + _dash_id = dash::myid(); + _dash_size = dash::size(); + } +}; + +#endif // DASH__TEST__DART_TYPES_TEST_H_ From 2d7b768de02966b29aa4fcdbe06638e352ffe0ec Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Sat, 18 Nov 2017 11:31:42 +0900 Subject: [PATCH 12/31] Add test cases for strided put and BlockedStridedToBlocked --- dash/test/dart/DARTDatatypesTest.cc | 185 +++++++++++++++++++++++----- 1 file changed, 154 insertions(+), 31 deletions(-) diff --git a/dash/test/dart/DARTDatatypesTest.cc b/dash/test/dart/DARTDatatypesTest.cc index 59ceb01b3..b87ccd3c1 100644 --- a/dash/test/dart/DARTDatatypesTest.cc +++ b/dash/test/dart/DARTDatatypesTest.cc @@ -8,9 +8,9 @@ #include #include -TEST_F(DARTDatatypesTest, StridedSimple) { - constexpr size_t num_elem_per_unit = 100; - constexpr size_t stride_size = 2; +TEST_F(DARTDatatypesTest, StridedGetSimple) { + constexpr size_t num_elem_per_unit = 120; + constexpr size_t max_stride_size = 5; dart_gptr_t gptr; int *local_ptr; @@ -23,53 +23,176 @@ TEST_F(DARTDatatypesTest, StridedSimple) { } dash::barrier(); + int *buf = new int[num_elem_per_unit]; + + for (int stride = 1; stride <= max_stride_size; stride++) { + + LOG_MESSAGE("Testing GET with stride %i", stride); + + dart_datatype_t new_type; + dart_type_create_strided(DART_TYPE_INT, stride, 1, &new_type); + + dart_unit_t neighbor = (dash::myid() + 1) % dash::size(); + + // global-to-local strided-to-contig + memset(buf, 0, sizeof(int)*num_elem_per_unit); + gptr.unitid = neighbor; + dart_get_blocking(buf, gptr, num_elem_per_unit / stride, + new_type, DART_TYPE_INT); + + // the first 50 elements should have a value + for (int i = 0; i < num_elem_per_unit / stride; ++i) { + ASSERT_EQ_U(i*stride, buf[i]); + } + + // global-to-local strided-to-contig + memset(buf, 0, sizeof(int)*num_elem_per_unit); + + dart_get_blocking(buf, gptr, num_elem_per_unit / stride, + DART_TYPE_INT, new_type); + + // every other element should have a value + for (int i = 0; i < num_elem_per_unit; ++i) { + if (i%stride == 0) { + ASSERT_EQ_U(i/stride, buf[i]); + } else { + ASSERT_EQ_U(0, buf[i]); + } + } + dart_type_destroy(&new_type); + } + + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); + + delete[] buf; - /** - * Create a strided type and fetch elements from our neighbor. - * Data: 0 1 2 3 ... 10 11 12 ... 90 91 92 93 ... 99 - * Result: 0 10 20 ... 90 - */ - dart_datatype_t new_type; - dart_type_create_strided(DART_TYPE_INT, stride_size, 1, &new_type); +} + + +TEST_F(DARTDatatypesTest, StridedPutSimple) { + constexpr size_t num_elem_per_unit = 120; + constexpr size_t max_stride_size = 5; + + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); dart_unit_t neighbor = (dash::myid() + 1) % dash::size(); - // global-to-local strided-to-contig int *buf = new int[num_elem_per_unit]; - memset(buf, 0, sizeof(int)*num_elem_per_unit); + for (int i = 0; i < num_elem_per_unit; ++i) { + buf[i] = i; + } gptr.unitid = neighbor; - dart_get_blocking(buf, gptr, num_elem_per_unit / stride_size, - new_type, DART_TYPE_INT); - // the first 50 elements should have a value - for (int i = 0; i < num_elem_per_unit / stride_size; ++i) { - ASSERT_EQ_U(i*stride_size, buf[i]); + for (int stride = 1; stride <= max_stride_size; stride++) { + + LOG_MESSAGE("Testing PUT with stride %i", stride); + + dash::barrier(); + dart_datatype_t new_type; + dart_type_create_strided(DART_TYPE_INT, stride, 1, &new_type); + + // local-to-global strided-to-contig + dart_put_blocking(gptr, buf, num_elem_per_unit / stride, + new_type, DART_TYPE_INT); + + dash::barrier(); + + // the first 50 elements should have a value + for (int i = 0; i < num_elem_per_unit / stride; ++i) { + ASSERT_EQ_U(i*stride, local_ptr[i]); + } + + // local-to-global strided-to-contig + memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); + + dart_put_blocking(gptr, buf, num_elem_per_unit / stride, + DART_TYPE_INT, new_type); + + dash::barrier(); + + // every other element should have a value + for (int i = 0; i < num_elem_per_unit; ++i) { + if (i%stride == 0) { + ASSERT_EQ_U(i/stride, local_ptr[i]); + } else { + ASSERT_EQ_U(0, local_ptr[i]); + } + } + + dart_type_destroy(&new_type); } - // global-to-local strided-to-contig - memset(buf, 0, sizeof(int)*num_elem_per_unit); + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); + + delete[] buf; +} - dart_get_blocking(buf, gptr, num_elem_per_unit / stride_size, - DART_TYPE_INT, new_type); - // every other element should have a value +TEST_F(DARTDatatypesTest, BlockedStridedToStrided) { + + constexpr size_t num_elem_per_unit = 120; + constexpr size_t from_stride = 5; + constexpr size_t from_block_size = 2; + constexpr size_t to_stride = 2; + + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); for (int i = 0; i < num_elem_per_unit; ++i) { - if (i%2 == 0) { - ASSERT_EQ_U(i/2, buf[i]); + local_ptr[i] = i; + } + + // global-to-local strided-to-contig + int *buf = new int[num_elem_per_unit]; + memset(buf, 0, sizeof(int)*num_elem_per_unit); + + dart_datatype_t to_type; + dart_type_create_strided(DART_TYPE_INT, to_stride, 1, &to_type); + dart_datatype_t from_type; + dart_type_create_strided(DART_TYPE_INT, from_stride, + from_block_size, &from_type); + + // strided-to-strided get + dart_get_blocking(buf, gptr, num_elem_per_unit / from_stride * from_block_size, + from_type, to_type); + + int value = 0; + for (int i = 0; + i < num_elem_per_unit/from_stride*to_stride*from_block_size; + ++i) { + if (i%to_stride == 0) { + ASSERT_EQ_U(value, buf[i]); + // consider the block size we used as source + // if + if ((value%from_stride) < (from_block_size-1)) { + // expect more elements with incremented value + ++value; + } else { + value += from_stride - (from_block_size - 1); + } } else { ASSERT_EQ_U(0, buf[i]); } } - dash::barrier(); + dart_type_destroy(&from_type); + dart_type_destroy(&to_type); + delete[] buf; // clean-up gptr.unitid = 0; dart_team_memfree(gptr); - - delete[] buf; - - dart_type_destroy(&new_type); - } - From 1a0fc192251574fb485ef4d7bec2955e96902451 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Sat, 18 Nov 2017 12:22:18 +0900 Subject: [PATCH 13/31] Add IndexedGetSimple test case --- dash/test/dart/DARTDatatypesTest.cc | 75 +++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/dash/test/dart/DARTDatatypesTest.cc b/dash/test/dart/DARTDatatypesTest.cc index b87ccd3c1..12f0970f8 100644 --- a/dash/test/dart/DARTDatatypesTest.cc +++ b/dash/test/dart/DARTDatatypesTest.cc @@ -196,3 +196,78 @@ TEST_F(DARTDatatypesTest, BlockedStridedToStrided) { gptr.unitid = 0; dart_team_memfree(gptr); } + + +TEST_F(DARTDatatypesTest, IndexedGetSimple) { + + constexpr size_t num_elem_per_unit = 120; + constexpr size_t num_blocks = 5; + + std::vector blocklens(num_blocks); + std::vector offsets(num_blocks); + + // set up offsets and block lengths + size_t num_elems = 0; + for (int i = 0; i < num_blocks; ++i) { + blocklens[i] = (i+1); + offsets[i] = (i*10); + num_elems += blocklens[i]; + } + + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + for (int i = 0; i < num_elem_per_unit; ++i) { + local_ptr[i] = i; + } + + dart_datatype_t new_type; + dart_type_create_indexed(DART_TYPE_INT, num_blocks, blocklens.data(), + offsets.data(), &new_type); + + dash::barrier(); + + int *buf = new int[num_elem_per_unit]; + memset(buf, 0, sizeof(int)*num_elem_per_unit); + + // indexed-to-contig + dart_get_blocking(buf, gptr, num_elems, new_type, DART_TYPE_INT); + + size_t idx = 0; + for (size_t i = 0; i < num_blocks; ++i) { + for (size_t j = 0; j < blocklens[i]; ++j) { + ASSERT_EQ_U(local_ptr[offsets[i] + j], buf[idx]); + ++idx; + } + } + + // check we haven't copied more elements than requested + for (size_t i = idx; i < num_elem_per_unit; ++i) { + ASSERT_EQ_U(0, buf[i]); + } + + + // contig-to-indexed + memset(buf, 0, sizeof(int)*num_elem_per_unit); + + // indexed-to-contig + dart_get_blocking(buf, gptr, num_elems, DART_TYPE_INT, new_type); + + idx = 0; + for (size_t i = 0; i < num_blocks; ++i) { + for (size_t j = 0; j < blocklens[i]; ++j) { + ASSERT_EQ_U(local_ptr[idx], buf[offsets[i] + j]); + ++idx; + } + } + + dart_type_destroy(&new_type); + + delete[] buf; + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); +} From 41f0706a782a6e7b8fcd601e3896ad4e5bc0f109 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Sat, 18 Nov 2017 17:59:32 +0900 Subject: [PATCH 14/31] Fix indentation in dart_globmem.h --- dart-if/include/dash/dart/if/dart_globmem.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dart-if/include/dash/dart/if/dart_globmem.h b/dart-if/include/dash/dart/if/dart_globmem.h index 5969b042d..3604b7059 100644 --- a/dart-if/include/dash/dart/if/dart_globmem.h +++ b/dart-if/include/dash/dart/if/dart_globmem.h @@ -318,7 +318,7 @@ dart_ret_t dart_memfree(dart_gptr_t gptr) DART_NOTHROW; */ dart_ret_t dart_team_memalloc_aligned( dart_team_t teamid, - size_t nelem, + size_t nelem, dart_datatype_t dtype, dart_gptr_t * gptr) DART_NOTHROW; @@ -362,10 +362,10 @@ dart_ret_t dart_team_memfree( */ dart_ret_t dart_team_memregister_aligned( dart_team_t teamid, - size_t nelem, + size_t nelem, dart_datatype_t dtype, - void * addr, - dart_gptr_t * gptr) DART_NOTHROW; + void * addr, + dart_gptr_t * gptr) DART_NOTHROW; /** * Collective function, attaches external memory previously allocated by @@ -386,10 +386,10 @@ dart_ret_t dart_team_memregister_aligned( */ dart_ret_t dart_team_memregister( dart_team_t teamid, - size_t nlelem, + size_t nlelem, dart_datatype_t dtype, - void * addr, - dart_gptr_t * gptr) DART_NOTHROW; + void * addr, + dart_gptr_t * gptr) DART_NOTHROW; /** * Collective function similar to dart_team_memfree() but on previously From c753db95d3111d6488ddffcd500eebed54b02712 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Sat, 18 Nov 2017 18:00:12 +0900 Subject: [PATCH 15/31] Add IndexedPutSimple test --- dash/test/dart/DARTDatatypesTest.cc | 79 ++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/dash/test/dart/DARTDatatypesTest.cc b/dash/test/dart/DARTDatatypesTest.cc index 12f0970f8..f317e2993 100644 --- a/dash/test/dart/DARTDatatypesTest.cc +++ b/dash/test/dart/DARTDatatypesTest.cc @@ -253,7 +253,6 @@ TEST_F(DARTDatatypesTest, IndexedGetSimple) { // contig-to-indexed memset(buf, 0, sizeof(int)*num_elem_per_unit); - // indexed-to-contig dart_get_blocking(buf, gptr, num_elems, DART_TYPE_INT, new_type); idx = 0; @@ -271,3 +270,81 @@ TEST_F(DARTDatatypesTest, IndexedGetSimple) { gptr.unitid = 0; dart_team_memfree(gptr); } + +TEST_F(DARTDatatypesTest, IndexedPutSimple) { + + constexpr size_t num_elem_per_unit = 120; + constexpr size_t num_blocks = 5; + + std::vector blocklens(num_blocks); + std::vector offsets(num_blocks); + + // set up offsets and block lengths + size_t num_elems = 0; + for (int i = 0; i < num_blocks; ++i) { + blocklens[i] = (i+1); + offsets[i] = (i*10); + num_elems += blocklens[i]; + } + + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + + dart_datatype_t new_type; + dart_type_create_indexed(DART_TYPE_INT, num_blocks, blocklens.data(), + offsets.data(), &new_type); + + dash::barrier(); + + int *buf = new int[num_elem_per_unit]; + for (int i = 0; i < num_elem_per_unit; ++i) { + buf[i] = i; + } + + memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); + + // indexed-to-contig + dart_put_blocking(gptr, buf, num_elems, new_type, DART_TYPE_INT); + + dash::barrier(); + + size_t idx = 0; + for (size_t i = 0; i < num_blocks; ++i) { + for (size_t j = 0; j < blocklens[i]; ++j) { + ASSERT_EQ_U(buf[offsets[i] + j], local_ptr[idx]); + ++idx; + } + } + + // check we haven't copied more elements than requested + for (size_t i = idx; i < num_elem_per_unit; ++i) { + ASSERT_EQ_U(0, local_ptr[i]); + } + + // contig-to-indexed + memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); + + dash::barrier(); + + dart_put_blocking(gptr, buf, num_elems, DART_TYPE_INT, new_type); + dash::barrier(); + + idx = 0; + for (size_t i = 0; i < num_blocks; ++i) { + for (size_t j = 0; j < blocklens[i]; ++j) { + ASSERT_EQ_U(buf[idx], local_ptr[offsets[i] + j]); + ++idx; + } + } + + dart_type_destroy(&new_type); + + delete[] buf; + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); +} From 4f0d1e2f0dc90a0113d4244d034582ab08a98c2f Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 20 Nov 2017 10:16:10 +0900 Subject: [PATCH 16/31] Wrap calls to MPI_Get/Rget/Put/Rput to avoid duplicating code --- dart-impl/mpi/src/dart_communication.c | 307 +++++++++++-------------- 1 file changed, 132 insertions(+), 175 deletions(-) diff --git a/dart-impl/mpi/src/dart_communication.c b/dart-impl/mpi/src/dart_communication.c index 084bcc6b7..e97dbb9d1 100644 --- a/dart-impl/mpi/src/dart_communication.c +++ b/dart-impl/mpi/src/dart_communication.c @@ -107,11 +107,16 @@ struct dart_handle_struct bool needs_flush; }; +/** + * Help to check for return of MPI call. + * Since DART currently does not define an MPI error handler the abort will not + * be reached. + */ #define CHECK_MPI_RET(__call, __name) \ do { \ - if (dart__unlikely(__call != MPI_SUCCESS)) { \ + if (dart__unlikely(__call != MPI_SUCCESS)) { \ DART_LOG_ERROR("%s ! %s failed!", __func__, __name); \ - return DART_ERR_OTHER; \ + dart_abort(DART_EXIT_ABORT); \ } \ } while (0) @@ -165,6 +170,44 @@ static dart_ret_t put_shared_mem( * basic data types and complex data types. */ +static __attribute__((always_inline)) inline +int +dart__mpi__get( + void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Win win, + MPI_Request *reqs, uint8_t * num_reqs) +{ + if (reqs != NULL) { + return MPI_Rget(origin_addr, origin_count, origin_datatype, + target_rank, target_disp, target_count, target_datatype, + win, &reqs[(*num_reqs)++]); + } else { + return MPI_Get(origin_addr, origin_count, origin_datatype, + target_rank, target_disp, target_count, + target_datatype, win); + } +} + +static __attribute__((always_inline)) inline +int +dart__mpi__put( + const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Win win, + MPI_Request *reqs, uint8_t * num_reqs) +{ + if (reqs != NULL) { + return MPI_Rput(origin_addr, origin_count, origin_datatype, + target_rank, target_disp, target_count, target_datatype, + win, &reqs[(*num_reqs)++]); + } else { + return MPI_Put(origin_addr, origin_count, origin_datatype, + target_rank, target_disp, target_count, + target_datatype, win); + } +} + static inline dart_ret_t dart__mpi__get_basic( @@ -212,67 +255,34 @@ dart__mpi__get_basic( char * dest_ptr = (char*) dest; if (nchunks > 0) { - if (reqs != NULL) { - DART_LOG_TRACE("dart_get: MPI_Rget (dest %p, size %zu)", - dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); - CHECK_MPI_RET( - MPI_Rget(dest_ptr, - nchunks, - dart__mpi__datatype_maxtype(dtype), - team_unit_id.id, - offset, - nchunks, - dart__mpi__datatype_maxtype(dtype), - win, - &reqs[(*num_reqs)++]), - "MPI_Rget"); - } else { - DART_LOG_TRACE("dart_get: MPI_Get (dest %p, size %zu)", - dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); - CHECK_MPI_RET( - MPI_Get(dest_ptr, - nchunks, - dart__mpi__datatype_maxtype(dtype), - team_unit_id.id, - offset, - nchunks, - dart__mpi__datatype_maxtype(dtype), - win), - "MPI_Get"); - } + DART_LOG_TRACE("dart_get: MPI_Get (dest %p, size %zu)", + dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); + CHECK_MPI_RET( + dart__mpi__get(dest_ptr, nchunks, + dart__mpi__datatype_maxtype(dtype), + team_unit_id.id, + offset, + nchunks, + dart__mpi__datatype_maxtype(dtype), + win, reqs, num_reqs), + "MPI_Get"); offset += nchunks * MAX_CONTIG_ELEMENTS; dest_ptr += nchunks * MAX_CONTIG_ELEMENTS; } if (remainder > 0) { - if (reqs != NULL) { - DART_LOG_TRACE("dart_get: MPI_Rget (dest %p, size %zu)", - dest_ptr, remainder); - CHECK_MPI_RET( - MPI_Rget(dest_ptr, - remainder, - dart__mpi__datatype_struct(dtype)->basic.mpi_type, - team_unit_id.id, - offset, - remainder, - dart__mpi__datatype_struct(dtype)->basic.mpi_type, - win, - &reqs[(*num_reqs)++]), - "MPI_Rget"); - } else { - DART_LOG_TRACE("dart_get: MPI_Get (dest %p, size %zu)", - dest_ptr, remainder); - CHECK_MPI_RET( - MPI_Get(dest_ptr, - remainder, - dart__mpi__datatype_struct(dtype)->basic.mpi_type, - team_unit_id.id, - offset, - remainder, - dart__mpi__datatype_struct(dtype)->basic.mpi_type, - win), - "MPI_Get"); - } + DART_LOG_TRACE("dart_get: MPI_Get (dest %p, size %zu)", + dest_ptr, remainder); + CHECK_MPI_RET( + dart__mpi__get(dest_ptr, + remainder, + dart__mpi__datatype_struct(dtype)->basic.mpi_type, + team_unit_id.id, + offset, + remainder, + dart__mpi__datatype_struct(dtype)->basic.mpi_type, + win, reqs, num_reqs), + "MPI_Get"); } return DART_OK; } @@ -302,39 +312,31 @@ dart__mpi__get_complex( int src_num_elem, dst_num_elem; dart__mpi__datatype_convert_mpi( src_type, nelem, &src_mpi_type, &src_num_elem); - dart__mpi__datatype_convert_mpi( - dst_type, nelem, &dst_mpi_type, &dst_num_elem); - if (reqs != NULL) { - DART_LOG_TRACE("dart_get: MPI_Rget (dest %p, size %zu)", dest_ptr, nelem); - CHECK_MPI_RET( - MPI_Rget(dest_ptr, - dst_num_elem, - dst_mpi_type, - team_unit_id.id, - offset, - src_num_elem, - src_mpi_type, - win, - &reqs[(*num_reqs)++]), - "MPI_Rget"); + if (src_type != dst_type) { + dart__mpi__datatype_convert_mpi( + dst_type, nelem, &dst_mpi_type, &dst_num_elem); } else { - DART_LOG_TRACE("dart_get: MPI_Get (dest %p, size %zu)", dest_ptr, nelem); - CHECK_MPI_RET( - MPI_Get(dest_ptr, - dst_num_elem, - dst_mpi_type, - team_unit_id.id, - offset, - src_num_elem, - src_mpi_type, - win), - "MPI_Get"); + dst_mpi_type = src_mpi_type; + dst_num_elem = src_num_elem; } + + DART_LOG_TRACE("dart_get: MPI_Rget (dest %p, size %zu)", dest_ptr, nelem); + CHECK_MPI_RET( + dart__mpi__get(dest_ptr, + dst_num_elem, + dst_mpi_type, + team_unit_id.id, + offset, + src_num_elem, + src_mpi_type, + win, + reqs, num_reqs), + "MPI_Rget"); // clean-up strided data types if (dart__mpi__datatype_isstrided(src_type)) { dart__mpi__destroy_strided_mpi(&src_mpi_type); } - if (dart__mpi__datatype_isstrided(dst_type)) { + if (src_type != dst_type && dart__mpi__datatype_isstrided(dst_type)) { dart__mpi__destroy_strided_mpi(&dst_mpi_type); } return DART_OK; @@ -389,34 +391,19 @@ dart__mpi__put_basic( const size_t remainder = nelem % MAX_CONTIG_ELEMENTS; if (nchunks > 0) { - if (reqs != NULL) { - DART_LOG_TRACE("dart_put: MPI_Rput (src %p, size %zu)", - src_ptr, nchunks * MAX_CONTIG_ELEMENTS); - CHECK_MPI_RET( - MPI_Rput(src_ptr, - nchunks, - dart__mpi__datatype_struct(dtype)->basic.max_type, - team_unit_id.id, - offset, - nchunks, - dart__mpi__datatype_struct(dtype)->basic.max_type, - win, - &reqs[(*num_reqs)++]), - "MPI_Rput"); - } else { - DART_LOG_TRACE("dart_put: MPI_Put (src %p, size %zu)", - src_ptr, nchunks * MAX_CONTIG_ELEMENTS); - CHECK_MPI_RET( - MPI_Put(src_ptr, - nchunks, - dart__mpi__datatype_struct(dtype)->basic.max_type, - team_unit_id.id, - offset, - nchunks, - dart__mpi__datatype_struct(dtype)->basic.max_type, - win), - "MPI_Put"); - } + DART_LOG_TRACE("dart_put: MPI_Rput (src %p, size %zu)", + src_ptr, nchunks * MAX_CONTIG_ELEMENTS); + CHECK_MPI_RET( + dart__mpi__put(src_ptr, + nchunks, + dart__mpi__datatype_struct(dtype)->basic.max_type, + team_unit_id.id, + offset, + nchunks, + dart__mpi__datatype_struct(dtype)->basic.max_type, + win, + reqs, num_reqs), + "MPI_Put"); offset += nchunks * MAX_CONTIG_ELEMENTS; src_ptr += nchunks * MAX_CONTIG_ELEMENTS; } @@ -424,34 +411,17 @@ dart__mpi__put_basic( if (remainder > 0) { DART_LOG_TRACE("dart_put: MPI_Put (src %p, size %zu)", src_ptr, remainder); - if (reqs != NULL) { - DART_LOG_TRACE("dart_put: MPI_Rput (src %p, size %zu)", - src_ptr, remainder); - CHECK_MPI_RET( - MPI_Rput(src_ptr, - remainder, - dart__mpi__datatype_struct(dtype)->basic.mpi_type, - team_unit_id.id, - offset, - remainder, - dart__mpi__datatype_struct(dtype)->basic.mpi_type, - win, - &reqs[(*num_reqs)++]), - "MPI_Rput"); - } else { - DART_LOG_TRACE("dart_put: MPI_Put (src %p, size %zu)", - src_ptr, remainder); - CHECK_MPI_RET( - MPI_Put(src_ptr, - remainder, - dart__mpi__datatype_struct(dtype)->basic.mpi_type, - team_unit_id.id, - offset, - remainder, - dart__mpi__datatype_struct(dtype)->basic.mpi_type, - win), - "MPI_Put"); - } + CHECK_MPI_RET( + dart__mpi__put(src_ptr, + remainder, + dart__mpi__datatype_struct(dtype)->basic.mpi_type, + team_unit_id.id, + offset, + remainder, + dart__mpi__datatype_struct(dtype)->basic.mpi_type, + win, + reqs, num_reqs), + "MPI_Put"); } return DART_OK; } @@ -484,48 +454,35 @@ dart__mpi__put_complex( int src_num_elem, dst_num_elem; dart__mpi__datatype_convert_mpi( src_type, nelem, &src_mpi_type, &src_num_elem); - dart__mpi__datatype_convert_mpi( - dst_type, nelem, &dst_mpi_type, &dst_num_elem); - - if (reqs != NULL) { - - DART_LOG_TRACE( - "dart_put: MPI_Rput (src %p, size %zu, src_type %p, dst_type %p)", - src_ptr, nelem, src_mpi_type, dst_mpi_type); - - CHECK_MPI_RET( - MPI_Rput(src_ptr, - src_num_elem, - src_mpi_type, - team_unit_id.id, - offset, - dst_num_elem, - dst_mpi_type, - win, - &reqs[(*num_reqs)++]), - "MPI_Rput"); - + if (src_type != dst_type) { + dart__mpi__datatype_convert_mpi( + dst_type, nelem, &dst_mpi_type, &dst_num_elem); } else { + dst_mpi_type = src_mpi_type; + dst_num_elem = src_num_elem; + } - DART_LOG_TRACE("dart_put: MPI_Put (src %p, size %zu)", src_ptr, nelem); + DART_LOG_TRACE( + "dart_put: MPI_Put (src %p, size %zu, src_type %p, dst_type %p)", + src_ptr, nelem, src_mpi_type, dst_mpi_type); - CHECK_MPI_RET( - MPI_Put(src_ptr, - src_num_elem, - src_mpi_type, - team_unit_id.id, - offset, - dst_num_elem, - dst_mpi_type, - win), - "MPI_Put"); + CHECK_MPI_RET( + dart__mpi__put(src_ptr, + src_num_elem, + src_mpi_type, + team_unit_id.id, + offset, + dst_num_elem, + dst_mpi_type, + win, + reqs, num_reqs), + "MPI_Put"); - } // clean-up strided data types if (dart__mpi__datatype_isstrided(src_type)) { dart__mpi__destroy_strided_mpi(&src_mpi_type); } - if (dart__mpi__datatype_isstrided(dst_type)) { + if (src_type != dst_type && dart__mpi__datatype_isstrided(dst_type)) { dart__mpi__destroy_strided_mpi(&dst_mpi_type); } return DART_OK; From 7f1cd4134acab75ba3d997cb4185c2be5ff07ee3 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 20 Nov 2017 10:16:54 +0900 Subject: [PATCH 17/31] Free handle in dart_get/put_handle if no requests are needed --- dart-impl/mpi/src/dart_communication.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dart-impl/mpi/src/dart_communication.c b/dart-impl/mpi/src/dart_communication.c index e97dbb9d1..82d271c7d 100644 --- a/dart-impl/mpi/src/dart_communication.c +++ b/dart-impl/mpi/src/dart_communication.c @@ -1176,6 +1176,11 @@ dart_ret_t dart_get_handle( handle->reqs, &handle->num_reqs); } + if (handle->num_reqs == 0) { + free(handle); + handle = DART_HANDLE_NULL; + } + *handleptr = handle; DART_LOG_TRACE("dart_get_handle > handle(%p) dest:%d", @@ -1244,6 +1249,11 @@ dart_ret_t dart_put_handle( &handle->needs_flush); } + if (handle->num_reqs == 0) { + free(handle); + handle = DART_HANDLE_NULL; + } + *handleptr = handle; DART_LOG_TRACE("dart_put_handle > handle(%p) dest:%d", From 11015ee19211cc462ed3d79aad3a71cd88bfe7fe Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 20 Nov 2017 11:02:20 +0900 Subject: [PATCH 18/31] Remove legacy dart_get_strided/indexed_handle --- .../include/dash/dart/if/dart_communication.h | 26 -- dart-impl/mpi/src/dart_communication.c | 323 ------------------ dash/include/dash/halo/HaloMatrixWrapper.h | 127 ++++--- 3 files changed, 86 insertions(+), 390 deletions(-) diff --git a/dart-if/include/dash/dart/if/dart_communication.h b/dart-if/include/dash/dart/if/dart_communication.h index 64032306e..6e7098efb 100644 --- a/dart-if/include/dash/dart/if/dart_communication.h +++ b/dart-if/include/dash/dart/if/dart_communication.h @@ -455,32 +455,6 @@ dart_ret_t dart_flush_local_all( */ typedef struct dart_handle_struct * dart_handle_t; -typedef enum { - STRIDED_TO_STRIDED = 0, - STRIDED_TO_CONTIG, - CONTIG_TO_STRIDED -} dart_stride_option; - -dart_ret_t dart_get_strided_handle( - void * dest, - dart_gptr_t gptr, - size_t nblocks, - size_t nelems_block, - size_t stride, - dart_datatype_t dtype, - dart_stride_option stride_opt, - dart_handle_t * handle); - -dart_ret_t dart_get_indexed_handle( - void * dest, - dart_gptr_t gptr, - size_t nblocks, - size_t nelems_block, - int* indexes, - dart_datatype_t dtype, - dart_stride_option stride_opt, - dart_handle_t * handle); - /** * 'HANDLE' variant of dart_get. * Neither local nor remote completion is guaranteed. A later diff --git a/dart-impl/mpi/src/dart_communication.c b/dart-impl/mpi/src/dart_communication.c index 82d271c7d..2749fedae 100644 --- a/dart-impl/mpi/src/dart_communication.c +++ b/dart-impl/mpi/src/dart_communication.c @@ -791,329 +791,6 @@ dart_ret_t dart_compare_and_swap( return DART_OK; } -dart_ret_t dart_get_strided_handle( - void* dest, - dart_gptr_t gptr, - size_t nblocks, - size_t nelems_block, - size_t stride, - dart_datatype_t dtype, - dart_stride_option stride_opt, - dart_handle_t* handleptr) -{ - dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); - dart_team_t teamid = gptr.teamid; - uint64_t offset = gptr.addr_or_offs.offset; - int16_t seg_id = gptr.segid; - - char* dest_ptr = (char*) dest; - *handleptr = DART_HANDLE_NULL; - - dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); - if (dart__unlikely(team_data == NULL)) { - DART_LOG_ERROR("dart_get_strided_handle ! failed: Unknown team %i!", teamid); - return DART_ERR_INVAL; - } - - CHECK_UNITID_RANGE(team_unit_id, team_data); - - dart_segment_info_t *seginfo = dart_segment_get_info( - &(team_data->segdata), seg_id); - if (dart__unlikely(seginfo == NULL)) { - DART_LOG_ERROR("dart_get_strided_handle ! " - "Unknown segment %i on team %i", seg_id, teamid); - return DART_ERR_INVAL; - } - - DART_LOG_DEBUG("dart_get_strided_handle: uid:%d o:%"PRIu64" s:%d t:%d, nelem:%zu", - team_unit_id.id, offset, seg_id, gptr.teamid, nblocks * nelems_block); - DART_LOG_TRACE("dart_get_handle: allocated handle:%p", (void *)(*handleptr)); - - /* - * MPI uses offset type int, do not copy more than INT_MAX elements: - */ - if (nelems_block * nblocks > INT_MAX) { - DART_LOG_ERROR("dart_get_strided_handle ! failed: nelem * blocks > INT_MAX"); - return DART_ERR_INVAL; - } - -#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_get_strided_handle: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - dart_team_unit_t luid = team_data->sharedmem_tab[gptr.unitid]; - /* - * Use memcpy if the target is in the same node as the calling unit: - */ - DART_LOG_DEBUG("dart_get_strided_handle: shared memory segment, seg_id:%d", seg_id); - char* baseptr = seginfo->baseptr[luid.id] + offset; - - DART_LOG_DEBUG( "dart_get_strided_handle: memcpy %zu bytes", - nblocks * nelems_block * dart__mpi__datatype_sizeof(dtype)); - - size_t size_dtype = dart__mpi__datatype_sizeof(dtype); - size_t size_nelems = nelems_block * size_dtype; - size_t offset_dest = stride * size_dtype; - size_t offset_src = offset_dest; - switch(stride_opt) { - case STRIDED_TO_STRIDED: break; - case STRIDED_TO_CONTIG: offset_dest = size_nelems; break; - case CONTIG_TO_STRIDED: offset_src = size_nelems; break; - defualt: DART_LOG_ERROR("dart_get_indexed_handle ! unknown stride option"); - return DART_ERR_INVAL; - } - - for(size_t i = 0; i < nblocks; ++i, dest_ptr += offset_dest, baseptr += offset_src) - memcpy(dest_ptr, baseptr, size_nelems); - - return DART_OK; - } -#else - DART_LOG_DEBUG("dart_get_strided_handle: shared windows disabled"); -#endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ - /* - * MPI shared windows disabled or target and calling unit are on different - * nodes, use MPI_Rget: - */ - MPI_Datatype mpi_elem_type = dart__mpi__datatype(dtype); - MPI_Datatype mpi_strided_type; - MPI_Type_vector(nblocks, nelems_block, stride, mpi_elem_type, &mpi_strided_type); - MPI_Type_commit(&mpi_strided_type); - - MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); - - dart_handle_t handle = calloc(1, sizeof(struct dart_handle_struct)); - handle->dest = team_unit_id.id; - handle->win = win; - handle->needs_flush = false; - - DART_LOG_DEBUG("dart_get_strided_handle: -- MPI_Rget(dest %p, size %zu)", - dest_ptr, nblocks * nelems_block); - - int mpi_ret = -1; - switch(stride_opt) { - case STRIDED_TO_STRIDED: - mpi_ret = MPI_Rget( - dest_ptr, // origin address - 1, // origin count - mpi_strided_type, // origin data type - team_unit_id.id, // target rank - offset, // target disp in window - 1, // target count - mpi_strided_type, // target data type - win, // window - &handle->reqs[0]); - break; - case STRIDED_TO_CONTIG: - mpi_ret = MPI_Rget( - dest_ptr, // origin address - nblocks * nelems_block, // origin count - mpi_elem_type, // origin data type - team_unit_id.id, // target rank - offset, // target disp in window - 1, // target count - mpi_strided_type, // target data type - win, // window - &handle->reqs[0]); - break; - case CONTIG_TO_STRIDED: - mpi_ret = MPI_Rget( - dest_ptr, // origin address - 1, // origin count - mpi_strided_type, // origin data type - team_unit_id.id, // target rank - offset, // target disp in window - nblocks * nelems_block, // target count - mpi_elem_type, // target data type - win, // window - &handle->reqs[0]); - break; - default: DART_LOG_ERROR("dart_get_strided_handle ! unknown stride option"); - } - - if (mpi_ret != MPI_SUCCESS) { - free(handle); - MPI_Type_free(&mpi_strided_type); - DART_LOG_ERROR("dart_get_strided_handle ! MPI_Rget failed"); - return DART_ERR_OTHER; - } - - handle->num_reqs++; - *handleptr = handle; - - DART_LOG_TRACE("dart_get_strided_handle > handle(%p) dest:%d win:%"PRIu64, - (void*) handle, handle->dest, (unsigned long) win); - - MPI_Type_free(&mpi_strided_type); - - return DART_OK; -} - -dart_ret_t dart_get_indexed_handle( - void* dest, - dart_gptr_t gptr, - size_t nblocks, - size_t nelems_block, - int* indexes, - dart_datatype_t dtype, - dart_stride_option stride_opt, - dart_handle_t* handleptr) -{ - dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); - dart_team_t teamid = gptr.teamid; - uint64_t offset = gptr.addr_or_offs.offset; - int16_t seg_id = gptr.segid; - - char* dest_ptr = (char*) dest; - *handleptr = DART_HANDLE_NULL; - - dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); - if (dart__unlikely(team_data == NULL)) { - DART_LOG_ERROR("dart_get_indexed_handle ! failed: Unknown team %i!", teamid); - return DART_ERR_INVAL; - } - - CHECK_UNITID_RANGE(team_unit_id, team_data); - - dart_segment_info_t *seginfo = dart_segment_get_info( - &(team_data->segdata), seg_id); - if (dart__unlikely(seginfo == NULL)) { - DART_LOG_ERROR("dart_get_indexed_handle ! " - "Unknown segment %i on team %i", seg_id, teamid); - return DART_ERR_INVAL; - } - - DART_LOG_DEBUG("dart_get_indexed_handle: uid:%d o:%"PRIu64" s:%d t:%d, nelem:%zu", - team_unit_id.id, offset, seg_id, gptr.teamid, nblocks * nelems_block); - DART_LOG_TRACE("dart_get_indexed_handle: allocated handle:%p", (void *)(*handleptr)); - - /* - * MPI uses offset type int, do not copy more than INT_MAX elements: - */ - if (nelems_block * nblocks > INT_MAX) { - DART_LOG_ERROR("dart_get_indexed_handle ! failed: nelem * blocks > INT_MAX"); - return DART_ERR_INVAL; - } - -#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_get_indexed_handle: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - dart_team_unit_t luid = team_data->sharedmem_tab[gptr.unitid]; - /* - * Use memcpy if the target is in the same node as the calling unit: - */ - DART_LOG_DEBUG("dart_get_indexed_handle: shared memory segment, seg_id:%d", seg_id); - char* baseptr = seginfo->baseptr[luid.id] + offset; - - DART_LOG_DEBUG( "dart_get_indexed_handle: memcpy %zu bytes", - nblocks * nelems_block * dart__mpi__datatype_sizeof(dtype)); - - size_t size_dtype = dart__mpi__datatype_sizeof(dtype); - size_t size_nelems = nelems_block * size_dtype; - - if(stride_opt == STRIDED_TO_STRIDED) { - for(size_t i = 0; i < nblocks; ++i) { - size_t size_offset = indexes[i] * size_dtype; - memcpy(dest_ptr + size_offset, baseptr + size_offset, size_nelems); - } - } - else if(stride_opt == STRIDED_TO_CONTIG) { - for(size_t i = 0; i < nblocks; ++i) - memcpy((dest_ptr) + i * size_nelems, baseptr + indexes[i] * size_dtype, size_nelems); - } - else if(stride_opt == CONTIG_TO_STRIDED) { - for(size_t i = 0; i < nblocks; ++i, baseptr += size_nelems) - memcpy(dest_ptr + indexes[i] * size_dtype, baseptr, size_nelems); - } - else { - DART_LOG_ERROR("dart_get_indexed_handle ! unknown stride option"); - return DART_ERR_INVAL; - } - - return DART_OK; - } -#else - DART_LOG_DEBUG("dart_get_indexed_handle: shared windows disabled"); -#endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ - /* - * MPI shared windows disabled or target and calling unit are on different - * nodes, use MPI_Rget: - */ - MPI_Datatype mpi_elem_type = dart__mpi__datatype(dtype); - MPI_Datatype mpi_indexed_type; - MPI_Type_create_indexed_block(nblocks, nelems_block, indexes, mpi_elem_type, &mpi_indexed_type); - MPI_Type_commit(&mpi_indexed_type); - - MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); - - - dart_handle_t handle = calloc(1, sizeof(struct dart_handle_struct)); - handle->dest = team_unit_id.id; - handle->win = win; - handle->needs_flush = false; - - DART_LOG_DEBUG("dart_get_indexed_handle: -- MPI_Rget(dest %p, size %zu)", - dest_ptr, nblocks * nelems_block); - - int mpi_ret = 0; - switch(stride_opt) { - case STRIDED_TO_STRIDED: - mpi_ret = MPI_Rget( - dest_ptr, // origin address - 1, // origin count - mpi_indexed_type, // origin data type - team_unit_id.id, // target rank - offset, // target disp in window - 1, // target count - mpi_indexed_type, // target data type - win, // window - &handle->reqs[0]); - break; - case STRIDED_TO_CONTIG: - mpi_ret = MPI_Rget( - dest_ptr, // origin address - nblocks * nelems_block, // origin count - mpi_elem_type, // origin data type - team_unit_id.id, // target rank - offset, // target disp in window - 1, // target count - mpi_indexed_type, // target data type - win, // window - &handle->reqs[0]); - break; - case CONTIG_TO_STRIDED: - mpi_ret = MPI_Rget( - dest_ptr, // origin address - 1, // origin count - mpi_indexed_type, // origin data type - team_unit_id.id, // target rank - offset, // target disp in window - nblocks * nelems_block, // target count - mpi_elem_type, // target data type - win, // window - &handle->reqs[0]); - break; - default: DART_LOG_ERROR("dart_get_indexed_handle ! unknown stride option"); - } - - if (mpi_ret != MPI_SUCCESS) { - free(handle); - MPI_Type_free(&mpi_indexed_type); - DART_LOG_ERROR("dart_get_indexed_handle ! MPI_Rget failed"); - return DART_ERR_OTHER; - } - - handle->num_reqs++; - *handleptr = handle; - - DART_LOG_TRACE("dart_get_indexed_handle > handle(%p) dest:%d win:%"PRIu64, - (void*) handle, handle->dest, (unsigned long) win); - - MPI_Type_free(&mpi_indexed_type); - - return DART_OK; -} /* -- Non-blocking dart one-sided operations -- */ dart_ret_t dart_get_handle( diff --git a/dash/include/dash/halo/HaloMatrixWrapper.h b/dash/include/dash/halo/HaloMatrixWrapper.h index a4833c80e..af9a44ffd 100644 --- a/dash/include/dash/halo/HaloMatrixWrapper.h +++ b/dash/include/dash/halo/HaloMatrixWrapper.h @@ -10,6 +10,7 @@ #include #include +#include namespace dash { @@ -80,24 +81,29 @@ class HaloMatrixWrapper { for(auto i = rel_dim - 1; i < NumDimensions; ++i) num_elems_block *= region.region().extent(i); + size_t region_size = region.size(); auto ds_num_elems_block = dart_storage(num_elems_block); - num_blocks = region.size() / num_elems_block; + num_blocks = region_size / num_elems_block; auto it_dist = it + num_elems_block; pattern_size_t stride = (num_blocks > 1) ? std::abs(it_dist.lpos().index - it.lpos().index) : 1; auto ds_stride = dart_storage(stride); - HaloData halo_data{ nullptr, std::vector(0) }; + HaloData halo_data; + dart_datatype_t stride_type; + dart_type_create_strided( + ds_num_elems_block.dtype, ds_stride.nelem, + ds_num_elems_block.nelem, &stride_type); + _dart_types.push_back(stride_type); _region_data.insert(std::make_pair( region.index(), Data{ region, - [off, it, num_blocks, ds_num_elems_block, - ds_stride](HaloData& data) { - dart_get_strided_handle( - off, it.dart_gptr(), num_blocks, - ds_num_elems_block.nelem, ds_stride.nelem, - ds_num_elems_block.dtype, - STRIDED_TO_CONTIG, &data.handle); + [off, it, region_size, ds_num_elems_block, + stride_type](HaloData& data) { + dart_get_handle( + off, it.dart_gptr(), region_size, + stride_type, ds_num_elems_block.dtype, + &data.handle); }, std::move(halo_data) })); @@ -105,24 +111,37 @@ class HaloMatrixWrapper { // TODO more optimizations else { num_elems_block *= region.region().extent(NumDimensions - 1); + size_t region_size = region.size(); auto ds_num_elems_block = dart_storage(num_elems_block); - num_blocks = region.size() / num_elems_block; + num_blocks = region_size / num_elems_block; auto it_tmp = it; - HaloData halo_data{ nullptr, std::vector(num_blocks) }; + HaloData halo_data; auto start_index = it.lpos().index; - for(auto& index : halo_data.indexes) { - index = static_cast( - dart_storage(it_tmp.lpos().index - start_index).nelem); + std::vector block_sizes(num_blocks); + std::vector block_offsets(num_blocks); + std::fill( + block_sizes.begin(), block_sizes.end(), ds_num_elems_block.nelem); + for(auto& index : block_offsets) { + index = + dart_storage(it_tmp.lpos().index - start_index).nelem; it_tmp += num_elems_block; } + dart_datatype_t index_type; + dart_type_create_indexed( + ds_num_elems_block.dtype, + num_blocks, // number of blocks + block_sizes.data(), // size of each block + block_offsets.data(), // offset of first element of each block + &index_type); + _dart_types.push_back(index_type); _region_data.insert(std::make_pair( region.index(), Data{ region, - [off, it, num_blocks, ds_num_elems_block](HaloData& data) { - dart_get_indexed_handle( - off, it.dart_gptr(), num_blocks, ds_num_elems_block.nelem, - data.indexes.data(), ds_num_elems_block.dtype, - STRIDED_TO_CONTIG, &data.handle); + [off, it, ds_num_elems_block,region_size, index_type] + (HaloData& data) { + dart_get_handle( + off, it.dart_gptr(), region_size, index_type, + ds_num_elems_block.dtype, &data.handle); }, std::move(halo_data) })); } @@ -132,48 +151,69 @@ class HaloMatrixWrapper { for(auto i = 0; i < rel_dim; ++i) num_elems_block *= region.region().extent(i); + size_t region_size = region.size(); auto ds_num_elems_block = dart_storage(num_elems_block); - num_blocks = region.size() / num_elems_block; + num_blocks = region_size / num_elems_block; auto it_dist = it + num_elems_block; pattern_size_t stride = (num_blocks > 1) ? std::abs(it_dist.lpos().index - it.lpos().index) : 1; auto ds_stride = dart_storage(stride); - HaloData halo_data{ nullptr, std::vector(0) }; + HaloData halo_data; + + dart_datatype_t stride_type; + dart_type_create_strided( + ds_num_elems_block.dtype, ds_stride.nelem, + ds_num_elems_block.nelem, &stride_type); + _dart_types.push_back(stride_type); _region_data.insert(std::make_pair( region.index(), Data{ region, - [off, it, num_blocks, ds_num_elems_block, - ds_stride](HaloData& data) { - dart_get_strided_handle( - off, it.dart_gptr(), num_blocks, - ds_num_elems_block.nelem, ds_stride.nelem, - ds_num_elems_block.dtype, - STRIDED_TO_CONTIG, &data.handle); + [off, it, region_size, ds_num_elems_block, + stride_type](HaloData& data) { + dart_get_handle( + off, it.dart_gptr(), region_size, + stride_type, ds_num_elems_block.dtype, + &data.handle); }, std::move(halo_data) })); } // TODO more optimizations else { num_elems_block *= region.region().extent(0); + size_t region_size = region.size(); auto ds_num_elems_block = dart_storage(num_elems_block); - num_blocks = region.size() / num_elems_block; + num_blocks = region_size / num_elems_block; auto it_tmp = it; - HaloData halo_data{ nullptr, std::vector(num_blocks) }; + HaloData halo_data; + std::vector block_sizes(num_blocks); + std::vector block_offsets(num_blocks); + std::fill( + block_sizes.begin(), block_sizes.end(), ds_num_elems_block.nelem); auto start_index = it.lpos().index; - for(auto& index : halo_data.indexes) { - index = static_cast( - dart_storage(it_tmp.lpos().index - start_index).nelem); + for(auto& index : block_offsets) { + index = + dart_storage(it_tmp.lpos().index - start_index).nelem; it_tmp += num_elems_block; } + + dart_datatype_t index_type; + dart_type_create_indexed( + ds_num_elems_block.dtype, + num_blocks, // number of blocks + block_sizes.data(), // size of each block + block_offsets.data(), // offset of first element of each block + &index_type); + _dart_types.push_back(index_type); + _region_data.insert(std::make_pair( region.index(), Data{ region, - [off, it, num_blocks, ds_num_elems_block](HaloData& data) { - dart_get_indexed_handle( - off, it.dart_gptr(), num_blocks, ds_num_elems_block.nelem, - data.indexes.data(), ds_num_elems_block.dtype, - STRIDED_TO_CONTIG, &data.handle); + [off, it, index_type, region_size, ds_num_elems_block] + (HaloData& data) { + dart_get_handle( + off, it.dart_gptr(), region_size, index_type, + ds_num_elems_block.dtype, &data.handle); }, std::move(halo_data) })); } @@ -183,7 +223,12 @@ class HaloMatrixWrapper { } } - ~HaloMatrixWrapper() {} + ~HaloMatrixWrapper() { + for(auto& dart_type : _dart_types) { + dart_type_destroy(&dart_type); + } + _dart_types.clear(); + } iterator begin() noexcept { return _begin; } @@ -306,8 +351,7 @@ class HaloMatrixWrapper { private: struct HaloData { - dart_handle_t handle; - std::vector indexes; + dart_handle_t handle = DART_HANDLE_NULL; }; struct Data { @@ -338,6 +382,7 @@ class HaloMatrixWrapper { const HaloBlock_t _haloblock; HaloMemory_t _halomemory; std::map _region_data; + std::vector _dart_types; iterator _begin; iterator _end; From c7298f9ed78c9ac93a568bb1c8c80380643ef829 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 20 Nov 2017 12:45:58 +0900 Subject: [PATCH 19/31] Fix type constraint checks in put/get --- dart-impl/mpi/src/dart_communication.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dart-impl/mpi/src/dart_communication.c b/dart-impl/mpi/src/dart_communication.c index 2749fedae..e13dc1882 100644 --- a/dart-impl/mpi/src/dart_communication.c +++ b/dart-impl/mpi/src/dart_communication.c @@ -60,10 +60,12 @@ if ((_num_elem % src_num_elem) != 0 || (_num_elem % dst_num_elem) != 0) { \ char *src_name = dart__mpi__datatype_name(_src_type); \ char *dst_name = dart__mpi__datatype_name(dst_type); \ - DART_LOG_ERROR("%s ! Type-mismatch would lead to truncation (%s vs %s)",\ - __FUNCTION__, src_name, dst_name); \ + DART_LOG_ERROR( \ + "%s ! Type-mismatch would lead to truncation (%s vs %s with %zu elems)",\ + __FUNCTION__, src_name, dst_name, _num_elem); \ free(src_name); \ free(dst_name); \ + return DART_ERR_INVAL; \ } \ } while (0) From 873baf3a68437605e4e391720e88bb5e448326b6 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 20 Nov 2017 12:46:29 +0900 Subject: [PATCH 20/31] Disable old checks using legacy dart_get_strided_handle interface --- dash/test/dart/DARTOnesidedTest.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dash/test/dart/DARTOnesidedTest.cc b/dash/test/dart/DARTOnesidedTest.cc index 836e0ba08..ba0206a5a 100644 --- a/dash/test/dart/DARTOnesidedTest.cc +++ b/dash/test/dart/DARTOnesidedTest.cc @@ -181,6 +181,7 @@ TEST_F(DARTOnesidedTest, GetHandleAllRemote) ASSERT_EQ_U(num_elem_copy, l); } +/* TEST_F(DARTOnesidedTest, GetStridedHandleAllRemote) { if (dash::size() < 2) { @@ -527,3 +528,4 @@ TEST_F(DARTOnesidedTest, GetIndexedHandleAllRemote) dash::Team::All().barrier(); } +*/ From a9945bc730584ccdbb3eb47d664dc3e4d0b0a54b Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 20 Nov 2017 12:46:59 +0900 Subject: [PATCH 21/31] HaloTest: Fix use of delete vs delete[] --- dash/test/halo/HaloTest.cc | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/dash/test/halo/HaloTest.cc b/dash/test/halo/HaloTest.cc index 0e150380c..7dacae307 100644 --- a/dash/test/halo/HaloTest.cc +++ b/dash/test/halo/HaloTest.cc @@ -224,8 +224,8 @@ TEST_F(HaloTest, HaloMatrixWrapperNonCyclic2D) } for(auto i = 0; i < ext_per_dim; ++i) - delete matrix_check[i]; - delete matrix_check; + delete[] matrix_check[i]; + delete[] matrix_check; } matrix_halo.barrier(); @@ -391,10 +391,10 @@ TEST_F(HaloTest, HaloMatrixWrapperNonCyclic3D) for(auto i = 0; i < ext_per_dim; ++i) { for(auto j = 0; j < ext_per_dim; ++j) - delete matrix_check[i][j]; - delete matrix_check[i]; + delete[] matrix_check[i][j]; + delete[] matrix_check[i]; } - delete matrix_check; + delete[] matrix_check; } matrix_halo.barrier(); @@ -494,10 +494,10 @@ TEST_F(HaloTest, HaloMatrixWrapperCyclic3D) for(auto i = 0; i < ext_per_dim_check; ++i) { for(auto j = 0; j < ext_per_dim_check; ++j) - delete matrix_check[i][j]; - delete matrix_check[i]; + delete[] matrix_check[i][j]; + delete[] matrix_check[i]; } - delete matrix_check; + delete[] matrix_check; } dash::Team::All().barrier(); @@ -598,10 +598,10 @@ TEST_F(HaloTest, HaloMatrixWrapperFixed3D) for(auto i = 0; i < ext_per_dim_check; ++i) { for(auto j = 0; j < ext_per_dim_check; ++j) - delete matrix_check[i][j]; - delete matrix_check[i]; + delete[] matrix_check[i][j]; + delete[] matrix_check[i]; } - delete matrix_check; + delete[] matrix_check; } @@ -713,10 +713,10 @@ TEST_F(HaloTest, HaloMatrixWrapperMix3D) } for(auto i = 0; i < ext_per_dim; ++i) { for(auto j = 0; j < ext_per_dim_check; ++j) - delete matrix_check[i][j]; - delete matrix_check[i]; + delete[] matrix_check[i][j]; + delete[] matrix_check[i]; } - delete matrix_check; + delete[] matrix_check; } matrix_halo.barrier(); @@ -845,10 +845,10 @@ TEST_F(HaloTest, HaloMatrixWrapperBigMix3D) for(auto i = 0; i < ext_per_dim; ++i) { for(auto j = 0; j < ext_per_dim_check; ++j) - delete matrix_check[i][j]; - delete matrix_check[i]; + delete[] matrix_check[i][j]; + delete[] matrix_check[i]; } - delete matrix_check; + delete[] matrix_check; } From 0360107841e1aea759a19f9401e32f061f89ff7a Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 20 Nov 2017 12:54:25 +0900 Subject: [PATCH 22/31] Move DART datatype tests to DARTOnesidedTest --- dash/test/dart/DARTDatatypesTest.cc | 350 ----------------- dash/test/dart/DARTDatatypesTest.h | 21 - dash/test/dart/DARTOnesidedTest.cc | 589 ++++++++++++++-------------- 3 files changed, 291 insertions(+), 669 deletions(-) delete mode 100644 dash/test/dart/DARTDatatypesTest.cc delete mode 100644 dash/test/dart/DARTDatatypesTest.h diff --git a/dash/test/dart/DARTDatatypesTest.cc b/dash/test/dart/DARTDatatypesTest.cc deleted file mode 100644 index f317e2993..000000000 --- a/dash/test/dart/DARTDatatypesTest.cc +++ /dev/null @@ -1,350 +0,0 @@ - -#include -#include - -#include "DARTDatatypesTest.h" - -#include -#include -#include - -TEST_F(DARTDatatypesTest, StridedGetSimple) { - constexpr size_t num_elem_per_unit = 120; - constexpr size_t max_stride_size = 5; - - dart_gptr_t gptr; - int *local_ptr; - dart_team_memalloc_aligned( - DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); - gptr.unitid = dash::myid(); - dart_gptr_getaddr(gptr, (void**)&local_ptr); - for (int i = 0; i < num_elem_per_unit; ++i) { - local_ptr[i] = i; - } - - dash::barrier(); - int *buf = new int[num_elem_per_unit]; - - for (int stride = 1; stride <= max_stride_size; stride++) { - - LOG_MESSAGE("Testing GET with stride %i", stride); - - dart_datatype_t new_type; - dart_type_create_strided(DART_TYPE_INT, stride, 1, &new_type); - - dart_unit_t neighbor = (dash::myid() + 1) % dash::size(); - - // global-to-local strided-to-contig - memset(buf, 0, sizeof(int)*num_elem_per_unit); - gptr.unitid = neighbor; - dart_get_blocking(buf, gptr, num_elem_per_unit / stride, - new_type, DART_TYPE_INT); - - // the first 50 elements should have a value - for (int i = 0; i < num_elem_per_unit / stride; ++i) { - ASSERT_EQ_U(i*stride, buf[i]); - } - - // global-to-local strided-to-contig - memset(buf, 0, sizeof(int)*num_elem_per_unit); - - dart_get_blocking(buf, gptr, num_elem_per_unit / stride, - DART_TYPE_INT, new_type); - - // every other element should have a value - for (int i = 0; i < num_elem_per_unit; ++i) { - if (i%stride == 0) { - ASSERT_EQ_U(i/stride, buf[i]); - } else { - ASSERT_EQ_U(0, buf[i]); - } - } - dart_type_destroy(&new_type); - } - - // clean-up - gptr.unitid = 0; - dart_team_memfree(gptr); - - delete[] buf; - -} - - -TEST_F(DARTDatatypesTest, StridedPutSimple) { - constexpr size_t num_elem_per_unit = 120; - constexpr size_t max_stride_size = 5; - - dart_gptr_t gptr; - int *local_ptr; - dart_team_memalloc_aligned( - DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); - gptr.unitid = dash::myid(); - dart_gptr_getaddr(gptr, (void**)&local_ptr); - memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); - - dart_unit_t neighbor = (dash::myid() + 1) % dash::size(); - - int *buf = new int[num_elem_per_unit]; - for (int i = 0; i < num_elem_per_unit; ++i) { - buf[i] = i; - } - gptr.unitid = neighbor; - - for (int stride = 1; stride <= max_stride_size; stride++) { - - LOG_MESSAGE("Testing PUT with stride %i", stride); - - dash::barrier(); - dart_datatype_t new_type; - dart_type_create_strided(DART_TYPE_INT, stride, 1, &new_type); - - // local-to-global strided-to-contig - dart_put_blocking(gptr, buf, num_elem_per_unit / stride, - new_type, DART_TYPE_INT); - - dash::barrier(); - - // the first 50 elements should have a value - for (int i = 0; i < num_elem_per_unit / stride; ++i) { - ASSERT_EQ_U(i*stride, local_ptr[i]); - } - - // local-to-global strided-to-contig - memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); - - dart_put_blocking(gptr, buf, num_elem_per_unit / stride, - DART_TYPE_INT, new_type); - - dash::barrier(); - - // every other element should have a value - for (int i = 0; i < num_elem_per_unit; ++i) { - if (i%stride == 0) { - ASSERT_EQ_U(i/stride, local_ptr[i]); - } else { - ASSERT_EQ_U(0, local_ptr[i]); - } - } - - dart_type_destroy(&new_type); - } - - // clean-up - gptr.unitid = 0; - dart_team_memfree(gptr); - - delete[] buf; -} - - -TEST_F(DARTDatatypesTest, BlockedStridedToStrided) { - - constexpr size_t num_elem_per_unit = 120; - constexpr size_t from_stride = 5; - constexpr size_t from_block_size = 2; - constexpr size_t to_stride = 2; - - dart_gptr_t gptr; - int *local_ptr; - dart_team_memalloc_aligned( - DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); - gptr.unitid = dash::myid(); - dart_gptr_getaddr(gptr, (void**)&local_ptr); - for (int i = 0; i < num_elem_per_unit; ++i) { - local_ptr[i] = i; - } - - // global-to-local strided-to-contig - int *buf = new int[num_elem_per_unit]; - memset(buf, 0, sizeof(int)*num_elem_per_unit); - - dart_datatype_t to_type; - dart_type_create_strided(DART_TYPE_INT, to_stride, 1, &to_type); - dart_datatype_t from_type; - dart_type_create_strided(DART_TYPE_INT, from_stride, - from_block_size, &from_type); - - // strided-to-strided get - dart_get_blocking(buf, gptr, num_elem_per_unit / from_stride * from_block_size, - from_type, to_type); - - int value = 0; - for (int i = 0; - i < num_elem_per_unit/from_stride*to_stride*from_block_size; - ++i) { - if (i%to_stride == 0) { - ASSERT_EQ_U(value, buf[i]); - // consider the block size we used as source - // if - if ((value%from_stride) < (from_block_size-1)) { - // expect more elements with incremented value - ++value; - } else { - value += from_stride - (from_block_size - 1); - } - } else { - ASSERT_EQ_U(0, buf[i]); - } - } - - dart_type_destroy(&from_type); - dart_type_destroy(&to_type); - - delete[] buf; - // clean-up - gptr.unitid = 0; - dart_team_memfree(gptr); -} - - -TEST_F(DARTDatatypesTest, IndexedGetSimple) { - - constexpr size_t num_elem_per_unit = 120; - constexpr size_t num_blocks = 5; - - std::vector blocklens(num_blocks); - std::vector offsets(num_blocks); - - // set up offsets and block lengths - size_t num_elems = 0; - for (int i = 0; i < num_blocks; ++i) { - blocklens[i] = (i+1); - offsets[i] = (i*10); - num_elems += blocklens[i]; - } - - dart_gptr_t gptr; - int *local_ptr; - dart_team_memalloc_aligned( - DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); - gptr.unitid = dash::myid(); - dart_gptr_getaddr(gptr, (void**)&local_ptr); - for (int i = 0; i < num_elem_per_unit; ++i) { - local_ptr[i] = i; - } - - dart_datatype_t new_type; - dart_type_create_indexed(DART_TYPE_INT, num_blocks, blocklens.data(), - offsets.data(), &new_type); - - dash::barrier(); - - int *buf = new int[num_elem_per_unit]; - memset(buf, 0, sizeof(int)*num_elem_per_unit); - - // indexed-to-contig - dart_get_blocking(buf, gptr, num_elems, new_type, DART_TYPE_INT); - - size_t idx = 0; - for (size_t i = 0; i < num_blocks; ++i) { - for (size_t j = 0; j < blocklens[i]; ++j) { - ASSERT_EQ_U(local_ptr[offsets[i] + j], buf[idx]); - ++idx; - } - } - - // check we haven't copied more elements than requested - for (size_t i = idx; i < num_elem_per_unit; ++i) { - ASSERT_EQ_U(0, buf[i]); - } - - - // contig-to-indexed - memset(buf, 0, sizeof(int)*num_elem_per_unit); - - dart_get_blocking(buf, gptr, num_elems, DART_TYPE_INT, new_type); - - idx = 0; - for (size_t i = 0; i < num_blocks; ++i) { - for (size_t j = 0; j < blocklens[i]; ++j) { - ASSERT_EQ_U(local_ptr[idx], buf[offsets[i] + j]); - ++idx; - } - } - - dart_type_destroy(&new_type); - - delete[] buf; - // clean-up - gptr.unitid = 0; - dart_team_memfree(gptr); -} - -TEST_F(DARTDatatypesTest, IndexedPutSimple) { - - constexpr size_t num_elem_per_unit = 120; - constexpr size_t num_blocks = 5; - - std::vector blocklens(num_blocks); - std::vector offsets(num_blocks); - - // set up offsets and block lengths - size_t num_elems = 0; - for (int i = 0; i < num_blocks; ++i) { - blocklens[i] = (i+1); - offsets[i] = (i*10); - num_elems += blocklens[i]; - } - - dart_gptr_t gptr; - int *local_ptr; - dart_team_memalloc_aligned( - DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); - gptr.unitid = dash::myid(); - dart_gptr_getaddr(gptr, (void**)&local_ptr); - - dart_datatype_t new_type; - dart_type_create_indexed(DART_TYPE_INT, num_blocks, blocklens.data(), - offsets.data(), &new_type); - - dash::barrier(); - - int *buf = new int[num_elem_per_unit]; - for (int i = 0; i < num_elem_per_unit; ++i) { - buf[i] = i; - } - - memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); - - // indexed-to-contig - dart_put_blocking(gptr, buf, num_elems, new_type, DART_TYPE_INT); - - dash::barrier(); - - size_t idx = 0; - for (size_t i = 0; i < num_blocks; ++i) { - for (size_t j = 0; j < blocklens[i]; ++j) { - ASSERT_EQ_U(buf[offsets[i] + j], local_ptr[idx]); - ++idx; - } - } - - // check we haven't copied more elements than requested - for (size_t i = idx; i < num_elem_per_unit; ++i) { - ASSERT_EQ_U(0, local_ptr[i]); - } - - // contig-to-indexed - memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); - - dash::barrier(); - - dart_put_blocking(gptr, buf, num_elems, DART_TYPE_INT, new_type); - dash::barrier(); - - idx = 0; - for (size_t i = 0; i < num_blocks; ++i) { - for (size_t j = 0; j < blocklens[i]; ++j) { - ASSERT_EQ_U(buf[idx], local_ptr[offsets[i] + j]); - ++idx; - } - } - - dart_type_destroy(&new_type); - - delete[] buf; - // clean-up - gptr.unitid = 0; - dart_team_memfree(gptr); -} diff --git a/dash/test/dart/DARTDatatypesTest.h b/dash/test/dart/DARTDatatypesTest.h deleted file mode 100644 index 01388c635..000000000 --- a/dash/test/dart/DARTDatatypesTest.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef DASH__TEST__DART_TYPES_TEST_H_ -#define DASH__TEST__DART_TYPES_TEST_H_ - -#include "../TestBase.h" - -/** - * Test fixture for onesided operations provided by DART. - */ -class DARTDatatypesTest : public dash::test::TestBase { -protected: - size_t _dash_id = 0; - size_t _dash_size = 0; - - virtual void SetUp() { - dash::test::TestBase::SetUp(); - _dash_id = dash::myid(); - _dash_size = dash::size(); - } -}; - -#endif // DASH__TEST__DART_TYPES_TEST_H_ diff --git a/dash/test/dart/DARTOnesidedTest.cc b/dash/test/dart/DARTOnesidedTest.cc index ba0206a5a..f866510de 100644 --- a/dash/test/dart/DARTOnesidedTest.cc +++ b/dash/test/dart/DARTOnesidedTest.cc @@ -181,351 +181,344 @@ TEST_F(DARTOnesidedTest, GetHandleAllRemote) ASSERT_EQ_U(num_elem_copy, l); } -/* -TEST_F(DARTOnesidedTest, GetStridedHandleAllRemote) -{ - if (dash::size() < 2) { - return; + +TEST_F(DARTOnesidedTest, StridedGetSimple) { + constexpr size_t num_elem_per_unit = 120; + constexpr size_t max_stride_size = 5; + + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + for (int i = 0; i < num_elem_per_unit; ++i) { + local_ptr[i] = i; } - using value_t = int; - const size_t block_size = 50; - size_t num_elem_total = dash::size() * block_size; + dash::barrier(); + int *buf = new int[num_elem_per_unit]; - dash::Array array(num_elem_total, dash::BLOCKED); - // Array to store local copy: - std::vector local_array(num_elem_total); - // Array of handles, one for each dart_get_handle: - std::vector handles; - handles.reserve(dash::size()); - // Assign initial values: [ 1000, 1001, 1002, ... 2000, 2001, ... ] - for (size_t i = 0; i < block_size; ++i) - array.local[i] = ((dash::myid() + 1) * 1000) + i; + for (int stride = 1; stride <= max_stride_size; stride++) { - array.barrier(); + LOG_MESSAGE("Testing GET with stride %i", stride); - LOG_MESSAGE("Requesting remote blocks"); - // Copy values from all blocks in strides: - size_t stride = 5; - size_t nblocks = block_size / stride; - size_t nelems_block = 3; + dart_datatype_t new_type; + dart_type_create_strided(DART_TYPE_INT, stride, 1, &new_type); - LOG_MESSAGE("DART stride: stride:%d nblocks:%d", stride, nblocks, nelems_block); + dart_unit_t neighbor = (dash::myid() + 1) % dash::size(); - LOG_MESSAGE("STRIDE_TO_STRIDE"); + // global-to-local strided-to-contig + memset(buf, 0, sizeof(int)*num_elem_per_unit); + gptr.unitid = neighbor; + dart_get_blocking(buf, gptr, num_elem_per_unit / stride, + new_type, DART_TYPE_INT); - for (size_t u = 0; u < dash::size(); ++u) { - dart_handle_t handle; - - EXPECT_EQ_U( - DART_OK, - dart_get_strided_handle( - local_array.data() + (u * block_size), - (array.begin() + (u * block_size)).dart_gptr(), - nblocks, nelems_block, stride, - dash::dart_datatype::value, - STRIDED_TO_STRIDED, - &handle) - ); - LOG_MESSAGE("dart_get_handle returned handle %p", - static_cast(handle)); - handles.push_back(handle); - } - // Wait for completion of get operations: - LOG_MESSAGE("Waiting for completion of async requests"); - dart_waitall_local( - handles.data(), - handles.size() - ); + // the first 50 elements should have a value + for (int i = 0; i < num_elem_per_unit / stride; ++i) { + ASSERT_EQ_U(i*stride, buf[i]); + } - LOG_MESSAGE("Validating values"); - for (size_t g = 0; g < array.size(); ++g) { - if(g % stride < nelems_block) - ASSERT_EQ_U((value_t)array[g], local_array[g]); - else - ASSERT_EQ_U(0, local_array[g]); + // global-to-local strided-to-contig + memset(buf, 0, sizeof(int)*num_elem_per_unit); + + dart_get_blocking(buf, gptr, num_elem_per_unit / stride, + DART_TYPE_INT, new_type); + + // every other element should have a value + for (int i = 0; i < num_elem_per_unit; ++i) { + if (i%stride == 0) { + ASSERT_EQ_U(i/stride, buf[i]); + } else { + ASSERT_EQ_U(0, buf[i]); + } + } + dart_type_destroy(&new_type); } - dash::Team::All().barrier(); - - LOG_MESSAGE("CONTIG_TO_STRIDE"); - - handles.clear(); - std::fill(local_array.begin(), local_array.end(), 0); - - for (auto u = 0; u < dash::size(); ++u) { - dart_handle_t handle; - - EXPECT_EQ_U( - DART_OK, - dart_get_strided_handle( - local_array.data() + (u * block_size), - (array.begin() + (u * block_size)).dart_gptr(), - nblocks, nelems_block, stride, - dash::dart_datatype::value, - CONTIG_TO_STRIDED, - &handle) - ); - LOG_MESSAGE("dart_get_handle returned handle %p", - static_cast(handle)); - handles.push_back(handle); + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); + + delete[] buf; + +} + + +TEST_F(DARTOnesidedTest, StridedPutSimple) { + constexpr size_t num_elem_per_unit = 120; + constexpr size_t max_stride_size = 5; + + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); + + dart_unit_t neighbor = (dash::myid() + 1) % dash::size(); + + int *buf = new int[num_elem_per_unit]; + for (int i = 0; i < num_elem_per_unit; ++i) { + buf[i] = i; } - // Wait for completion of get operations: - LOG_MESSAGE("Waiting for completion of async requests"); - dart_waitall_local( - handles.data(), - handles.size() - ); + gptr.unitid = neighbor; - LOG_MESSAGE("Validating values"); - for (auto g = 0, l = 0; l < local_array.size(); ++l) { - if(l % block_size == 0) - g = l; - if(l % stride < nelems_block) { - ASSERT_EQ_U((value_t)array[g], local_array[l]); - ++g; + for (int stride = 1; stride <= max_stride_size; stride++) { + + LOG_MESSAGE("Testing PUT with stride %i", stride); + + dash::barrier(); + dart_datatype_t new_type; + dart_type_create_strided(DART_TYPE_INT, stride, 1, &new_type); + + // local-to-global strided-to-contig + dart_put_blocking(gptr, buf, num_elem_per_unit / stride, + new_type, DART_TYPE_INT); + + dash::barrier(); + + // the first 50 elements should have a value + for (int i = 0; i < num_elem_per_unit / stride; ++i) { + ASSERT_EQ_U(i*stride, local_ptr[i]); } - else - ASSERT_EQ_U(0, local_array[l]); + // local-to-global strided-to-contig + memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); + + dart_put_blocking(gptr, buf, num_elem_per_unit / stride, + DART_TYPE_INT, new_type); + + dash::barrier(); + + // every other element should have a value + for (int i = 0; i < num_elem_per_unit; ++i) { + if (i%stride == 0) { + ASSERT_EQ_U(i/stride, local_ptr[i]); + } else { + ASSERT_EQ_U(0, local_ptr[i]); + } + } + + dart_type_destroy(&new_type); } - dash::Team::All().barrier(); - - LOG_MESSAGE("STRIDE_TO_CONTIG"); - - handles.clear(); - std::fill(local_array.begin(), local_array.end(), 0); - - for (auto u = 0; u < dash::size(); ++u) { - dart_handle_t handle; - - EXPECT_EQ_U( - DART_OK, - dart_get_strided_handle( - local_array.data() + (u * block_size), - (array.begin() + (u * block_size)).dart_gptr(), - nblocks, nelems_block, stride, - dash::dart_datatype::value, - STRIDED_TO_CONTIG, - &handle) - ); - LOG_MESSAGE("dart_get_handle returned handle %p", - static_cast(handle)); - handles.push_back(handle); + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); + + delete[] buf; +} + + +TEST_F(DARTOnesidedTest, BlockedStridedToStrided) { + + constexpr size_t num_elem_per_unit = 120; + constexpr size_t from_stride = 5; + constexpr size_t from_block_size = 2; + constexpr size_t to_stride = 2; + + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + for (int i = 0; i < num_elem_per_unit; ++i) { + local_ptr[i] = i; } - // Wait for completion of get operations: - LOG_MESSAGE("Waiting for completion of async requests"); - dart_waitall_local( - handles.data(), - handles.size() - ); - LOG_MESSAGE("Validating values"); - auto nelems_cont = nblocks * nelems_block; - for (auto g = 0, l = 0; g < array.size(); ++g) { - auto test_new_block = g % block_size; - if(test_new_block == 0) - l = g; - - if(test_new_block < nelems_cont) { - ASSERT_EQ_U((value_t)array[l], local_array[g]); - auto test_index = l % stride; - if(test_index < nelems_block - 1) - ++l; - else - l += stride - test_index; + // global-to-local strided-to-contig + int *buf = new int[num_elem_per_unit]; + memset(buf, 0, sizeof(int)*num_elem_per_unit); + + dart_datatype_t to_type; + dart_type_create_strided(DART_TYPE_INT, to_stride, 1, &to_type); + dart_datatype_t from_type; + dart_type_create_strided(DART_TYPE_INT, from_stride, + from_block_size, &from_type); + + // strided-to-strided get + dart_get_blocking(buf, gptr, num_elem_per_unit / from_stride * from_block_size, + from_type, to_type); + + int value = 0; + for (int i = 0; + i < num_elem_per_unit/from_stride*to_stride*from_block_size; + ++i) { + if (i%to_stride == 0) { + ASSERT_EQ_U(value, buf[i]); + // consider the block size we used as source + // if + if ((value%from_stride) < (from_block_size-1)) { + // expect more elements with incremented value + ++value; + } else { + value += from_stride - (from_block_size - 1); + } + } else { + ASSERT_EQ_U(0, buf[i]); } - else - ASSERT_EQ_U(0, local_array[g]); } - dash::Team::All().barrier(); + dart_type_destroy(&from_type); + dart_type_destroy(&to_type); + + delete[] buf; + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); } -TEST_F(DARTOnesidedTest, GetIndexedHandleAllRemote) -{ - if (dash::size() < 2) { - return; + +TEST_F(DARTOnesidedTest, IndexedGetSimple) { + + constexpr size_t num_elem_per_unit = 120; + constexpr size_t num_blocks = 5; + + std::vector blocklens(num_blocks); + std::vector offsets(num_blocks); + + // set up offsets and block lengths + size_t num_elems = 0; + for (int i = 0; i < num_blocks; ++i) { + blocklens[i] = (i+1); + offsets[i] = (i*10); + num_elems += blocklens[i]; } - using value_t = int; - const size_t block_size = 50; - size_t num_elem_total = dash::size() * block_size; + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + for (int i = 0; i < num_elem_per_unit; ++i) { + local_ptr[i] = i; + } - dash::Array array(num_elem_total, dash::BLOCKED); - // Array to store local copy: - std::vector local_array(num_elem_total); - // Array of handles, one for each dart_get_handle: - std::vector handles; - handles.reserve(dash::size()); - // Assign initial values: [ 1000, 1001, 1002, ... 2000, 2001, ... ] - for (size_t i = 0; i < block_size; ++i) - array.local[i] = ((dash::myid() + 1) * 1000) + i; + dart_datatype_t new_type; + dart_type_create_indexed(DART_TYPE_INT, num_blocks, blocklens.data(), + offsets.data(), &new_type); - array.barrier(); + dash::barrier(); - LOG_MESSAGE("Requesting remote blocks"); + int *buf = new int[num_elem_per_unit]; + memset(buf, 0, sizeof(int)*num_elem_per_unit); - constexpr size_t nblocks = 7; - std::array indexes{0,5,10,20,25,40,45}; - size_t nelems_block = 3; + // indexed-to-contig + dart_get_blocking(buf, gptr, num_elems, new_type, DART_TYPE_INT); - LOG_MESSAGE("STRIDE_TO_STRIDE"); + size_t idx = 0; + for (size_t i = 0; i < num_blocks; ++i) { + for (size_t j = 0; j < blocklens[i]; ++j) { + ASSERT_EQ_U(local_ptr[offsets[i] + j], buf[idx]); + ++idx; + } + } - for (size_t u = 0; u < dash::size(); ++u) { - dart_handle_t handle; - - EXPECT_EQ_U( - DART_OK, - dart_get_indexed_handle( - local_array.data() + (u * block_size), - (array.begin() + (u * block_size)).dart_gptr(), - nblocks, nelems_block, indexes.data(), - dash::dart_datatype::value, - STRIDED_TO_STRIDED, - &handle) - ); - LOG_MESSAGE("dart_get_handle returned handle %p", - static_cast(handle)); - handles.push_back(handle); + // check we haven't copied more elements than requested + for (size_t i = idx; i < num_elem_per_unit; ++i) { + ASSERT_EQ_U(0, buf[i]); } - // Wait for completion of get operations: - LOG_MESSAGE("Waiting for completion of async requests"); - dart_waitall_local( - handles.data(), - handles.size() - ); - LOG_MESSAGE("Validating values"); - auto index = indexes.begin(); - size_t next_block = 0; - for (size_t g = 0; g < array.size(); ++g) { - if(g % block_size == 0 && g != 0) { - next_block += block_size; - index = indexes.begin(); - } - auto index_tmp = next_block + *index; - if(g >= index_tmp && g < index_tmp + nelems_block) { - ASSERT_EQ_U((value_t)array[g], local_array[g]); - if(g == index_tmp + nelems_block - 1) - ++index; + // contig-to-indexed + memset(buf, 0, sizeof(int)*num_elem_per_unit); + + dart_get_blocking(buf, gptr, num_elems, DART_TYPE_INT, new_type); + + idx = 0; + for (size_t i = 0; i < num_blocks; ++i) { + for (size_t j = 0; j < blocklens[i]; ++j) { + ASSERT_EQ_U(local_ptr[idx], buf[offsets[i] + j]); + ++idx; } - else - ASSERT_EQ_U(0, local_array[g]); + } + dart_type_destroy(&new_type); + + delete[] buf; + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); +} + +TEST_F(DARTOnesidedTest, IndexedPutSimple) { + + constexpr size_t num_elem_per_unit = 120; + constexpr size_t num_blocks = 5; + + std::vector blocklens(num_blocks); + std::vector offsets(num_blocks); + + // set up offsets and block lengths + size_t num_elems = 0; + for (int i = 0; i < num_blocks; ++i) { + blocklens[i] = (i+1); + offsets[i] = (i*10); + num_elems += blocklens[i]; } - dash::Team::All().barrier(); - - LOG_MESSAGE("CONTIG_TO_STRIDE"); - - handles.clear(); - std::fill(local_array.begin(), local_array.end(), 0); - - for (auto u = 0; u < dash::size(); ++u) { - dart_handle_t handle; - - EXPECT_EQ_U( - DART_OK, - dart_get_indexed_handle( - local_array.data() + (u * block_size), - (array.begin() + (u * block_size)).dart_gptr(), - nblocks, nelems_block, indexes.data(), - dash::dart_datatype::value, - CONTIG_TO_STRIDED, - &handle) - ); - LOG_MESSAGE("dart_get_handle returned handle %p", - static_cast(handle)); - handles.push_back(handle); + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + + dart_datatype_t new_type; + dart_type_create_indexed(DART_TYPE_INT, num_blocks, blocklens.data(), + offsets.data(), &new_type); + + dash::barrier(); + + int *buf = new int[num_elem_per_unit]; + for (int i = 0; i < num_elem_per_unit; ++i) { + buf[i] = i; } - // Wait for completion of get operations: - LOG_MESSAGE("Waiting for completion of async requests"); - dart_waitall_local( - handles.data(), - handles.size() - ); - LOG_MESSAGE("Validating values"); - index = indexes.begin(); - next_block = 0; - - for (auto g = 0, l = 0; l < local_array.size(); ++l) { - if(l % block_size == 0 && l != 0) { - next_block += block_size; - index = indexes.begin(); - g=l; - } - auto index_tmp = next_block + *index; - if(l >= index_tmp && l < index_tmp + nelems_block) { - ASSERT_EQ_U((value_t)array[g], local_array[l]); - if(l == index_tmp + nelems_block - 1) - ++index; - ++g; - } - else - ASSERT_EQ_U(0, local_array[l]); + memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); + + // indexed-to-contig + dart_put_blocking(gptr, buf, num_elems, new_type, DART_TYPE_INT); + dash::barrier(); + + size_t idx = 0; + for (size_t i = 0; i < num_blocks; ++i) { + for (size_t j = 0; j < blocklens[i]; ++j) { + ASSERT_EQ_U(buf[offsets[i] + j], local_ptr[idx]); + ++idx; + } } - dash::Team::All().barrier(); - - LOG_MESSAGE("STRIDE_TO_CONTIG"); - - handles.clear(); - std::fill(local_array.begin(), local_array.end(), 0); - - for (auto u = 0; u < dash::size(); ++u) { - dart_handle_t handle; - - EXPECT_EQ_U( - DART_OK, - dart_get_indexed_handle( - local_array.data() + (u * block_size), - (array.begin() + (u * block_size)).dart_gptr(), - nblocks, nelems_block, indexes.data(), - dash::dart_datatype::value, - STRIDED_TO_CONTIG, - &handle) - ); - LOG_MESSAGE("dart_get_handle returned handle %p", - static_cast(handle)); - handles.push_back(handle); + // check we haven't copied more elements than requested + for (size_t i = idx; i < num_elem_per_unit; ++i) { + ASSERT_EQ_U(0, local_ptr[i]); } - // Wait for completion of get operations: - LOG_MESSAGE("Waiting for completion of async requests"); - dart_waitall_local( - handles.data(), - handles.size() - ); - LOG_MESSAGE("Validating values"); - auto nelems_cont = nblocks * nelems_block; - for (auto g = 0, l = 0; g < array.size(); ++g) { - auto test_new_block = g % block_size; - if(test_new_block == 0 && g != 0) { - next_block += block_size; - index = indexes.begin(); - l=g; - } - auto index_tmp = next_block + *index; - if(test_new_block < nelems_cont) { - if(g >= index_tmp && g < index_tmp + nelems_block) { - ASSERT_EQ_U((value_t)array[l], local_array[g]); - if(g == index_tmp + nelems_block - 1) - ++index; - ++l; - } - else - l += index_tmp; - } - else - ASSERT_EQ_U(0, local_array[g]); + // contig-to-indexed + memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); + dash::barrier(); + + dart_put_blocking(gptr, buf, num_elems, DART_TYPE_INT, new_type); + dash::barrier(); + + idx = 0; + for (size_t i = 0; i < num_blocks; ++i) { + for (size_t j = 0; j < blocklens[i]; ++j) { + ASSERT_EQ_U(buf[idx], local_ptr[offsets[i] + j]); + ++idx; + } } - dash::Team::All().barrier(); + dart_type_destroy(&new_type); + + delete[] buf; + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); } -*/ From 6eeae2e95f72fc3896eaf7838881f04f27f4ad9f Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 20 Nov 2017 14:16:04 +0900 Subject: [PATCH 23/31] Add indexed-to-indexed test --- dash/test/dart/DARTOnesidedTest.cc | 97 ++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/dash/test/dart/DARTOnesidedTest.cc b/dash/test/dart/DARTOnesidedTest.cc index f866510de..db08674d5 100644 --- a/dash/test/dart/DARTOnesidedTest.cc +++ b/dash/test/dart/DARTOnesidedTest.cc @@ -522,3 +522,100 @@ TEST_F(DARTOnesidedTest, IndexedPutSimple) { gptr.unitid = 0; dart_team_memfree(gptr); } + + +TEST_F(DARTOnesidedTest, IndexedToIndexedGet) { + + constexpr size_t num_elem_per_unit = 120; + constexpr size_t num_blocks_to = 10; + constexpr size_t num_blocks_from = 5; + + std::vector blocklens_to(num_blocks_to); + std::vector offsets_to(num_blocks_to); + std::vector blocklens_from(num_blocks_from); + std::vector offsets_from(num_blocks_from); + + // set up offsets and block lengths + size_t num_elems_to = 0; + for (int i = 0; i < num_blocks_to; ++i) { + blocklens_to[i] = (i+1); + offsets_to[i] = (i*5); + num_elems_to += blocklens_to[i]; + } + + size_t num_elems_from = 0; + for (int i = 0; i < num_blocks_from; ++i) { + blocklens_from[i] = (i+1)+8; + offsets_from[i] = (i*10); + num_elems_from += blocklens_from[i]; + } + + ASSERT_EQ_U(num_elems_from, num_elems_to); + + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + for (int i = 0; i < num_elem_per_unit; ++i) { + local_ptr[i] = i; + } + + dart_datatype_t to_type; + dart_type_create_indexed(DART_TYPE_INT, num_blocks_to, + blocklens_to.data(), + offsets_to.data(), &to_type); + + dart_datatype_t from_type; + dart_type_create_indexed(DART_TYPE_INT, num_blocks_from, + blocklens_from.data(), + offsets_from.data(), &from_type); + + dash::barrier(); + + int *buf = new int[num_elem_per_unit]; + memset(buf, 0, sizeof(int)*num_elem_per_unit); + + int *index_map_to = new int[num_elem_per_unit]; + memset(index_map_to, 0, sizeof(int)*num_elem_per_unit); + + int *index_map_from = new int[num_elem_per_unit]; + memset(index_map_from, 0, sizeof(int)*num_elem_per_unit); + + // populate the flat list of indices to copy from + size_t idx = 0; + for (size_t i = 0; i < num_blocks_from; ++i) { + for (size_t j = 0; j < blocklens_from[i]; ++j) { + index_map_from[idx] = offsets_from[i] + j; + ++idx; + } + } + + // populate the mapping from target indices to values + idx = 0; + for (size_t i = 0; i < num_blocks_to; ++i) { + for (size_t j = 0; j < blocklens_to[i]; ++j) { + index_map_to[offsets_to[i] + j] = index_map_from[idx]; + ++idx; + } + } + + // indexed-to-indexed + dart_get_blocking(buf, gptr, num_elems_to, from_type, to_type); + + for (size_t i = 0; i < num_elem_per_unit; ++i) { + ASSERT_EQ_U(local_ptr[index_map_to[i]], buf[i]); + } + + dart_type_destroy(&from_type); + dart_type_destroy(&to_type); + + delete[] buf; + delete[] index_map_to; + delete[] index_map_from; + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); +} + From 8ca5705d7e8f597de352aa70188e016eb19b707d Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 20 Nov 2017 14:25:14 +0900 Subject: [PATCH 24/31] Check for basic type in global memory allocation --- .../dash/dart/mpi/dart_communication_priv.h | 18 ++++++++++++++++++ dart-impl/mpi/src/dart_communication.c | 10 ---------- dart-impl/mpi/src/dart_globmem.c | 7 ++++--- dart-impl/mpi/src/dart_mpi_types.c | 1 - 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h b/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h index 34877d726..b3e032b74 100644 --- a/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h +++ b/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h @@ -193,4 +193,22 @@ dart__mpi__datatype_convert_mpi( char* dart__mpi__datatype_name(dart_datatype_t dart_type) DART_INTERNAL; +/** + * Helper macro that checks whether the given type is a basic type + * and errors out in case of an error. + */ + +#define CHECK_IS_BASICTYPE(_dtype) \ + do { \ + if (dart__unlikely(!dart__mpi__datatype_isbasic(_dtype))) { \ + char *name = dart__mpi__datatype_name(_dtype); \ + DART_LOG_ERROR( \ + "%s ! Only basic types allowed in this operation (%s given)",\ + __FUNCTION__, name); \ + free(name); \ + return DART_ERR_INVAL; \ + } \ + } while (0) + + #endif /* DART_ADAPT_COMMUNICATION_PRIV_H_INCLUDED */ diff --git a/dart-impl/mpi/src/dart_communication.c b/dart-impl/mpi/src/dart_communication.c index e13dc1882..349d85035 100644 --- a/dart-impl/mpi/src/dart_communication.c +++ b/dart-impl/mpi/src/dart_communication.c @@ -73,16 +73,6 @@ CHECK_EQUAL_BASETYPE(_src_type, _dst_type); \ CHECK_NUM_ELEM(_src_type, _dst_type, _num_elem); -#define CHECK_IS_BASICTYPE(_dtype) \ - do { \ - if (dart__unlikely(!dart__mpi__datatype_isbasic(_dtype))) { \ - char *name = dart__mpi__datatype_name(_dtype); \ - DART_LOG_ERROR("dart_get ! Cannot fetch-op non-basic type (%s)", name); \ - free(name); \ - return DART_ERR_INVAL; \ - } \ - } while (0) - /** * Temporary space allocation: * - on the stack for allocations <=64B diff --git a/dart-impl/mpi/src/dart_globmem.c b/dart-impl/mpi/src/dart_globmem.c index f9d95daac..1553b4a9b 100644 --- a/dart-impl/mpi/src/dart_globmem.c +++ b/dart-impl/mpi/src/dart_globmem.c @@ -368,7 +368,7 @@ dart_team_memalloc_aligned_dynamic( "baseptr:%p segid:%i across team %d", nbytes, gptr_unitid, sub_mem, segment->segid, teamid); - return DART_OK; + return DART_OK; } static dart_ret_t @@ -447,6 +447,7 @@ dart_team_memalloc_aligned( dart_datatype_t dtype, dart_gptr_t * gptr) { + CHECK_IS_BASICTYPE(dtype); #ifdef DART_MPI_ENABLE_DYNAMIC_WINDOWS return dart_team_memalloc_aligned_dynamic(teamid, nelem, dtype, gptr); #else @@ -548,6 +549,7 @@ dart_team_memregister_aligned( void * addr, dart_gptr_t * gptr) { + CHECK_IS_BASICTYPE(dtype); size_t size; int dtype_size = dart__mpi__datatype_sizeof(dtype); size_t nbytes = nelem * dtype_size; @@ -615,6 +617,7 @@ dart_team_memregister( void * addr, dart_gptr_t * gptr) { + CHECK_IS_BASICTYPE(dtype); int nil; size_t size; int dtype_size = dart__mpi__datatype_sizeof(dtype); @@ -722,5 +725,3 @@ dart_team_memderegister( unitid.id, gptr.addr_or_offs.offset, gptr.unitid, teamid); return DART_OK; } - - diff --git a/dart-impl/mpi/src/dart_mpi_types.c b/dart-impl/mpi/src/dart_mpi_types.c index fffa44554..c7f22a1b8 100644 --- a/dart-impl/mpi/src/dart_mpi_types.c +++ b/dart-impl/mpi/src/dart_mpi_types.c @@ -187,7 +187,6 @@ dart__mpi__create_strided_mpi( dart__mpi__datatype_struct(dts->base_type)->basic.mpi_type, &new_mpi_dtype); MPI_Type_commit(&new_mpi_dtype); - printf("Created new strided MPI type %p (%d, %d, %d)\n", new_mpi_dtype, num_blocks, dts->num_elem, dts->strided.stride); return new_mpi_dtype; } From 617334da25c4e06ea6af29be2292143c529cb912 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 20 Nov 2017 15:26:34 +0900 Subject: [PATCH 25/31] Add documentation for second type parameter --- .../include/dash/dart/if/dart_communication.h | 83 ++++++++++--------- dart-if/include/dash/dart/if/dart_types.h | 4 + 2 files changed, 46 insertions(+), 41 deletions(-) diff --git a/dart-if/include/dash/dart/if/dart_communication.h b/dart-if/include/dash/dart/if/dart_communication.h index 6e7098efb..3987ae083 100644 --- a/dart-if/include/dash/dart/if/dart_communication.h +++ b/dart-if/include/dash/dart/if/dart_communication.h @@ -329,10 +329,13 @@ dart_ret_t dart_compare_and_swap( * is guaranteed. A later flush operation is needed to guarantee * local and remote completion. * - * \param dest The local destination buffer to store the data to. - * \param gptr A global pointer determining the source of the get operation. - * \param nelem The number of elements of type \c dtype to transfer. - * \param dtype The data type of the values in buffer \c dest. + * \param dest The local destination buffer to store the data to. + * \param gptr A global pointer determining the source of the get operation. + * \param nelem The number of elements of type \c dtype to transfer. + * \param src_type The data type of the values at the source. + * \param dst_type The data type of the values in buffer \c dest. + * + * \note Base-type conversion is not performed. * * \return \c DART_OK on success, any other of \ref dart_ret_t otherwise. * @@ -353,10 +356,13 @@ dart_ret_t dart_get( * is guaranteed. A later flush operation is needed to guarantee * local and remote completion. * - * \param gptr A global pointer determining the target of the put operation. - * \param src The local source buffer to load the data from. - * \param nelem The number of elements of type \c dtype to transfer. - * \param dtype The data type of the values in buffer \c src. + * \param gptr A global pointer determining the target of the put operation. + * \param src The local source buffer to load the data from. + * \param nelem The number of elements of type \c dtype to transfer. + * \param src_type The data type of the values in buffer \c src. + * \param dst_type The data type of the values at the target. + * + * \note Base-type conversion is not performed. * * \return \c DART_OK on success, any other of \ref dart_ret_t otherwise. * @@ -455,23 +461,6 @@ dart_ret_t dart_flush_local_all( */ typedef struct dart_handle_struct * dart_handle_t; -/** - * 'HANDLE' variant of dart_get. - * Neither local nor remote completion is guaranteed. A later - * dart_wait*() call or a fence/flush operation is needed to guarantee - * completion. - * - * \param dest Local target memory to store the data. - * \param gptr Global pointer being the source of the data transfer. - * \param nelem The number of elements of \c dtype in buffer \c dest. - * \param dtype The data type of the values in buffer \c dest. - * \param[out] handle Pointer to DART handle to instantiate for later use with \c dart_wait, \c dart_wait_all etc. - * - * \return \c DART_OK on success, any other of \ref dart_ret_t otherwise. - * - * \threadsafe - * \ingroup DartCommunication - */ #define DART_HANDLE_NULL (dart_handle_t)NULL /** @@ -480,12 +469,15 @@ typedef struct dart_handle_struct * dart_handle_t; * dart_wait*() call or a fence/flush operation is needed to guarantee * completion. * - * \param dest Local target memory to store the data. - * \param gptr Global pointer being the source of the data transfer. - * \param nelem The number of elements of \c dtype in buffer \c dest. - * \param dtype The data type of the values in buffer \c dest. + * \param dest Local target memory to store the data. + * \param gptr Global pointer being the source of the data transfer. + * \param nelem The number of elements of \c dtype in buffer \c dest. + * \param src_type The data type of the values at the source. + * \param dst_type The data type of the values in buffer \c dest. * \param[out] handle Pointer to DART handle to instantiate for later use with \c dart_wait, \c dart_wait_all etc. * + * \note Base-type conversion is not performed. + * * \return \c DART_OK on success, any other of \ref dart_ret_t otherwise. * * \threadsafe @@ -505,12 +497,15 @@ dart_ret_t dart_get_handle( * dart_wait*() call or a fence/flush operation is needed to guarantee * completion. * - * \param gptr Global pointer being the target of the data transfer. - * \param src Local source memory to transfer data from. - * \param nelem The number of elements of type \c dtype to transfer. - * \param dtype The data type of the values in buffer \c dest. + * \param gptr Global pointer being the target of the data transfer. + * \param src Local source memory to transfer data from. + * \param nelem The number of elements of type \c dtype to transfer. + * \param src_type The data type of the values in buffer \c src. + * \param dst_type The data type of the values at the target. * \param[out] handle Pointer to DART handle to instantiate for later use with \c dart_wait, \c dart_wait_all etc. * + * \note Base-type conversion is not performed. + * * \return \c DART_OK on success, any other of \ref dart_ret_t otherwise. * * \threadsafe @@ -635,10 +630,13 @@ dart_ret_t dart_testall_local( * 'BLOCKING' variant of dart_get. * Both local and remote completion is guaranteed. * - * \param dest Local target memory to store the data. - * \param gptr Global pointer being the source of the data transfer. - * \param nelem The number of elements of type \c dtype to transfer. - * \param dtype The data type of the values in buffer \c dest. + * \param dest Local target memory to store the data. + * \param gptr Global pointer being the source of the data transfer. + * \param nelem The number of elements of type \c dtype to transfer. + * \param src_type The data type of the values at the source. + * \param dst_type The data type of the values in buffer \c dest. + * + * \note Base-type conversion is not performed. * * \return \c DART_OK on success, any other of \ref dart_ret_t otherwise. * @@ -656,10 +654,13 @@ dart_ret_t dart_get_blocking( * 'BLOCKING' variant of dart_put. * Both local and remote completion is guaranteed. * - * \param gptr Global pointer being the target of the data transfer. - * \param src Local source memory to transfer data from. - * \param nelem The number of elements of type \c dtype to transfer. - * \param dtype The data type of the values in buffer \c dest. + * \param gptr Global pointer being the target of the data transfer. + * \param src Local source memory to transfer data from. + * \param nelem The number of elements of type \c dtype to transfer. + * \param src_type The data type of the values in buffer \c src. + * \param dst_type The data type of the values at the target. + * + * \note Base-type conversion is not performed. * * \return \c DART_OK on success, any other of \ref dart_ret_t otherwise. * diff --git a/dart-if/include/dash/dart/if/dart_types.h b/dart-if/include/dash/dart/if/dart_types.h index 170115de9..c380d18ce 100644 --- a/dart-if/include/dash/dart/if/dart_types.h +++ b/dart-if/include/dash/dart/if/dart_types.h @@ -740,6 +740,10 @@ dart_type_create_indexed( * Destroy a data type that was previously created using * \ref dart_type_create_strided or \ref dart_type_create_indexed. * + * Data types can be destroyed before pending operations using that type have + * completed. However, after destruction a type may not be used to start + * new operations. + * * \param dart_type The type to be destroyed. * * \return \ref DART_OK on success, any other of \ref dart_ret_t otherwise. From 0cf56432a94999d4e56778990e2685d807cb34e6 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 20 Nov 2017 15:28:49 +0900 Subject: [PATCH 26/31] Fix datatype tests by adding a barrier before segment destruction --- dash/test/dart/DARTOnesidedTest.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dash/test/dart/DARTOnesidedTest.cc b/dash/test/dart/DARTOnesidedTest.cc index db08674d5..bdbf1f0f0 100644 --- a/dash/test/dart/DARTOnesidedTest.cc +++ b/dash/test/dart/DARTOnesidedTest.cc @@ -199,6 +199,9 @@ TEST_F(DARTOnesidedTest, StridedGetSimple) { dash::barrier(); int *buf = new int[num_elem_per_unit]; + dart_unit_t neighbor = (dash::myid() + 1) % dash::size(); + gptr.unitid = neighbor; + for (int stride = 1; stride <= max_stride_size; stride++) { LOG_MESSAGE("Testing GET with stride %i", stride); @@ -206,15 +209,12 @@ TEST_F(DARTOnesidedTest, StridedGetSimple) { dart_datatype_t new_type; dart_type_create_strided(DART_TYPE_INT, stride, 1, &new_type); - dart_unit_t neighbor = (dash::myid() + 1) % dash::size(); - // global-to-local strided-to-contig memset(buf, 0, sizeof(int)*num_elem_per_unit); - gptr.unitid = neighbor; dart_get_blocking(buf, gptr, num_elem_per_unit / stride, new_type, DART_TYPE_INT); - // the first 50 elements should have a value + // the first elements should have a value for (int i = 0; i < num_elem_per_unit / stride; ++i) { ASSERT_EQ_U(i*stride, buf[i]); } @@ -236,6 +236,8 @@ TEST_F(DARTOnesidedTest, StridedGetSimple) { dart_type_destroy(&new_type); } + dash::barrier(); + // clean-up gptr.unitid = 0; dart_team_memfree(gptr); From fc63e2ac95ecdc5b953b5b9bea47f48a50935870 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 20 Nov 2017 16:13:45 +0900 Subject: [PATCH 27/31] More barriers for tests --- dash/test/dart/DARTOnesidedTest.cc | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/dash/test/dart/DARTOnesidedTest.cc b/dash/test/dart/DARTOnesidedTest.cc index bdbf1f0f0..8abd278a2 100644 --- a/dash/test/dart/DARTOnesidedTest.cc +++ b/dash/test/dart/DARTOnesidedTest.cc @@ -257,20 +257,21 @@ TEST_F(DARTOnesidedTest, StridedPutSimple) { DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); gptr.unitid = dash::myid(); dart_gptr_getaddr(gptr, (void**)&local_ptr); - memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); dart_unit_t neighbor = (dash::myid() + 1) % dash::size(); + gptr.unitid = neighbor; int *buf = new int[num_elem_per_unit]; for (int i = 0; i < num_elem_per_unit; ++i) { buf[i] = i; } - gptr.unitid = neighbor; for (int stride = 1; stride <= max_stride_size; stride++) { LOG_MESSAGE("Testing PUT with stride %i", stride); + memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); + dash::barrier(); dart_datatype_t new_type; dart_type_create_strided(DART_TYPE_INT, stride, 1, &new_type); @@ -279,9 +280,7 @@ TEST_F(DARTOnesidedTest, StridedPutSimple) { dart_put_blocking(gptr, buf, num_elem_per_unit / stride, new_type, DART_TYPE_INT); - dash::barrier(); - - // the first 50 elements should have a value + // the first elements should have a value for (int i = 0; i < num_elem_per_unit / stride; ++i) { ASSERT_EQ_U(i*stride, local_ptr[i]); } @@ -289,6 +288,8 @@ TEST_F(DARTOnesidedTest, StridedPutSimple) { // local-to-global strided-to-contig memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); + dash::barrier(); + dart_put_blocking(gptr, buf, num_elem_per_unit / stride, DART_TYPE_INT, new_type); @@ -367,6 +368,8 @@ TEST_F(DARTOnesidedTest, BlockedStridedToStrided) { dart_type_destroy(&from_type); dart_type_destroy(&to_type); + dash::barrier(); + delete[] buf; // clean-up gptr.unitid = 0; @@ -441,6 +444,8 @@ TEST_F(DARTOnesidedTest, IndexedGetSimple) { dart_type_destroy(&new_type); + dash::barrier(); + delete[] buf; // clean-up gptr.unitid = 0; @@ -483,6 +488,8 @@ TEST_F(DARTOnesidedTest, IndexedPutSimple) { memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); + dash::barrier(); + // indexed-to-contig dart_put_blocking(gptr, buf, num_elems, new_type, DART_TYPE_INT); @@ -613,6 +620,8 @@ TEST_F(DARTOnesidedTest, IndexedToIndexedGet) { dart_type_destroy(&from_type); dart_type_destroy(&to_type); + dash::barrier(); + delete[] buf; delete[] index_map_to; delete[] index_map_from; From 39d36800a495287c28921131199daf8d09f53496 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 20 Nov 2017 17:03:51 +0900 Subject: [PATCH 28/31] Another missing barrier --- dash/test/dart/DARTOnesidedTest.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dash/test/dart/DARTOnesidedTest.cc b/dash/test/dart/DARTOnesidedTest.cc index 8abd278a2..7d068dc86 100644 --- a/dash/test/dart/DARTOnesidedTest.cc +++ b/dash/test/dart/DARTOnesidedTest.cc @@ -272,27 +272,26 @@ TEST_F(DARTOnesidedTest, StridedPutSimple) { memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); - dash::barrier(); dart_datatype_t new_type; dart_type_create_strided(DART_TYPE_INT, stride, 1, &new_type); + dash::barrier(); // local-to-global strided-to-contig dart_put_blocking(gptr, buf, num_elem_per_unit / stride, new_type, DART_TYPE_INT); + dash::barrier(); // the first elements should have a value for (int i = 0; i < num_elem_per_unit / stride; ++i) { ASSERT_EQ_U(i*stride, local_ptr[i]); } - // local-to-global strided-to-contig + // local-to-global contig-to-strided memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); dash::barrier(); - dart_put_blocking(gptr, buf, num_elem_per_unit / stride, DART_TYPE_INT, new_type); - dash::barrier(); // every other element should have a value @@ -331,6 +330,7 @@ TEST_F(DARTOnesidedTest, BlockedStridedToStrided) { for (int i = 0; i < num_elem_per_unit; ++i) { local_ptr[i] = i; } + dash::barrier(); // global-to-local strided-to-contig int *buf = new int[num_elem_per_unit]; From 3f44c8d0f13fcb7ed1ac19f64da7a18f75721264 Mon Sep 17 00:00:00 2001 From: Felix Moessbauer Date: Wed, 22 Nov 2017 08:18:38 +0100 Subject: [PATCH 29/31] disabled nastyMPI --- circle.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/circle.yml b/circle.yml index 7b9d63512..25fff329f 100644 --- a/circle.yml +++ b/circle.yml @@ -21,8 +21,8 @@ test: parallel: true - bash ./dash/scripts/circleci/run-docker.sh Minimal gnu: parallel: true - - bash ./dash/scripts/circleci/run-docker.sh Nasty gnu: - parallel: true +# - bash ./dash/scripts/circleci/run-docker.sh Nasty gnu: +# parallel: true - bash ./dash/scripts/circleci/run-docker.sh Release clang: parallel: true - grep "FAIL" ./dash-ci.log && (echo "Full log:" ; cat ./dash-ci.log ; exit 1) || exit 0: From 8dbbf1eb03e7b39d0f5187b28093a35b82430ba6 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 22 Nov 2017 17:10:36 +0900 Subject: [PATCH 30/31] Reduce size of matrix in Halo tests --- dash/test/halo/HaloTest.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dash/test/halo/HaloTest.h b/dash/test/halo/HaloTest.h index 50ae13859..350a5cda1 100644 --- a/dash/test/halo/HaloTest.h +++ b/dash/test/halo/HaloTest.h @@ -12,7 +12,7 @@ class HaloTest : public dash::test::TestBase { virtual ~HaloTest() { } - static constexpr long ext_per_dim = 150; + static constexpr long ext_per_dim = 100; }; From a09a0434fa2ddf6cec35fc5448d4336e434bf314 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 22 Nov 2017 17:44:11 +0900 Subject: [PATCH 31/31] Fix build error with clang --- dash/include/dash/Onesided.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dash/include/dash/Onesided.h b/dash/include/dash/Onesided.h index 85b6c8c76..c6fc53925 100644 --- a/dash/include/dash/Onesided.h +++ b/dash/include/dash/Onesided.h @@ -176,7 +176,7 @@ void fence_local( * \nonblocking */ template -constexpr +inline void put_value_async( /// [IN] Value to set const T & newval, @@ -193,7 +193,7 @@ void put_value_async( * \nonblocking */ template -constexpr +inline void get_value_async( /// [OUT] Local pointer that will contain the value of the /// global address @@ -210,7 +210,7 @@ void get_value_async( * \blocking */ template -constexpr +inline void put_value( /// [IN] Value to set const T & newval, @@ -226,7 +226,7 @@ void put_value( * \blocking */ template -constexpr +inline void get_value( /// [OUT] Local pointer that will contain the value of the /// global address