55 * Copyright (c) Amazon.com, Inc. or its affiliates.
66 * All Rights reserved.
77 * Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights reserved.
8+ * Copyright (c) 2024 The University of Tennessee and The University
9+ * of Tennessee Research Foundation. All rights
10+ * reserved.
811 *
912 * $COPYRIGHT$
1013 *
@@ -184,6 +187,19 @@ typedef int (*opal_accelerator_base_module_check_addr_fn_t)(
184187typedef int (* opal_accelerator_base_module_create_stream_fn_t )(
185188 int dev_id , opal_accelerator_stream_t * * stream );
186189
190+
191+ /**
192+ * Query the default stream.
193+ *
194+ * @param[IN] dev_id Associated device for the stream or
195+ * MCA_ACCELERATOR_NO_DEVICE_ID
196+ * @param[OUT] stream Set to the default stream.
197+ *
198+ * @return OPAL_SUCCESS or error status on failure
199+ */
200+ typedef int (* opal_accelerator_base_get_default_stream_fn_t )(
201+ int dev_id , opal_accelerator_stream_t * * stream );
202+
187203/**
188204 * Creates an event. An event is a synchronization marker that can be
189205 * appended to a stream to monitor device progress or synchronize the
@@ -193,7 +209,7 @@ typedef int (*opal_accelerator_base_module_create_stream_fn_t)(
193209 * @param[IN] dev_id Associated device for the event or
194210 * MCA_ACCELERATOR_NO_DEVICE_ID
195211 * @param[OUT] event Event to create
196- * @param[IN] enable_ipc support inter-process tracking of the event
212+ * @param[IN] enable_ipc support inter-process tracking of the event
197213 *
198214 * @return OPAL_SUCCESS or error status on failure.
199215 */
@@ -310,6 +326,31 @@ typedef int (*opal_accelerator_base_module_memmove_fn_t)(
310326 int dest_dev_id , int src_dev_id , void * dest , const void * src , size_t size ,
311327 opal_accelerator_transfer_type_t type );
312328
329+
330+ /**
331+ * Copies memory asynchronously from src to dest. Memory of dest and src
332+ * may overlap. Optionally can specify the transfer type to
333+ * avoid pointer detection for performance. The operations will be enqueued
334+ * into the provided stream but are not guaranteed to be complete upon return.
335+ *
336+ * @param[IN] dest_dev_id Associated device to copy to or
337+ * MCA_ACCELERATOR_NO_DEVICE_ID
338+ * @param[IN] src_dev_id Associated device to copy from or
339+ * MCA_ACCELERATOR_NO_DEVICE_ID
340+ * @param[IN] dest Destination to copy memory to
341+ * @param[IN] src Source to copy memory from
342+ * @param[IN] size Size of memory to copy
343+ * @param[IN] stream Stream to perform asynchronous move on
344+ * @param[IN] type Transfer type field for performance
345+ * Can be set to MCA_ACCELERATOR_TRANSFER_UNSPEC
346+ * if caller is unsure of the transfer direction.
347+ *
348+ * @return OPAL_SUCCESS or error status on failure
349+ */
350+ typedef int (* opal_accelerator_base_module_memmove_async_fn_t )(
351+ int dest_dev_id , int src_dev_id , void * dest , const void * src , size_t size ,
352+ opal_accelerator_stream_t * stream , opal_accelerator_transfer_type_t type );
353+
313354/**
314355 * Allocates size bytes memory from the device and sets ptr to the
315356 * pointer of the allocated memory. The memory is not initialized.
@@ -340,6 +381,46 @@ typedef int (*opal_accelerator_base_module_mem_alloc_fn_t)(
340381typedef int (* opal_accelerator_base_module_mem_release_fn_t )(
341382 int dev_id , void * ptr );
342383
384+
385+ /**
386+ * Allocates size bytes memory from the device and sets ptr to the
387+ * pointer of the allocated memory. The memory is not initialized.
388+ * The allocation request is placed into the stream object.
389+ * Any use of the memory must succeed the completion of this
390+ * operation on the stream.
391+ *
392+ * @param[IN] dev_id Associated device for the allocation or
393+ * MCA_ACCELERATOR_NO_DEVICE_ID
394+ * @param[OUT] ptr Returns pointer to allocated memory
395+ * @param[IN] size Size of memory to allocate
396+ * @param[IN] stream Stream into which to insert the allocation request
397+ *
398+ * @return OPAL_SUCCESS or error status on failure
399+ */
400+ typedef int (* opal_accelerator_base_module_mem_alloc_stream_fn_t )(
401+ int dev_id , void * * ptr , size_t size , opal_accelerator_stream_t * stream );
402+
403+ /**
404+ * Frees the memory space pointed to by ptr which has been returned by
405+ * a previous call to an opal_accelerator_base_module_mem_alloc_stream_fn_t().
406+ * If the function is called on a ptr that has already been freed,
407+ * undefined behavior occurs. If ptr is NULL, no operation is performed,
408+ * and the function returns OPAL_SUCCESS.
409+ * The release of the memory will be inserted into the stream and occurs after
410+ * all previous operations have completed.
411+ *
412+ * @param[IN] dev_id Associated device for the allocation or
413+ * MCA_ACCELERATOR_NO_DEVICE_ID
414+ * @param[IN] ptr Pointer to free
415+ * @param[IN] stream Stream into which to insert the free operation
416+ *
417+ * @return OPAL_SUCCESS or error status on failure
418+ */
419+ typedef int (* opal_accelerator_base_module_mem_release_stream_fn_t )(
420+ int dev_id , void * ptr , opal_accelerator_stream_t * stream );
421+
422+
423+
343424/**
344425 * Retrieves the base address and/or size of a memory allocation of the
345426 * device.
@@ -557,6 +638,35 @@ typedef int (*opal_accelerator_base_module_device_can_access_peer_fn_t)(
557638typedef int (* opal_accelerator_base_module_get_buffer_id_fn_t )(
558639 int dev_id , const void * addr , opal_accelerator_buffer_id_t * buf_id );
559640
641+ /**
642+ * Wait for the completion of all operations inserted into the stream.
643+ *
644+ * @param[IN] stram The stream to wait for.
645+ *
646+ * @return OPAL_SUCCESS or error status on failure
647+ */
648+ typedef int (* opal_accelerator_base_module_wait_stream_fn_t )(opal_accelerator_stream_t * stream );
649+
650+ /**
651+ * Get the number of devices available.
652+ *
653+ * @param[OUT] stram Number of devices.
654+ *
655+ * @return OPAL_SUCCESS or error status on failure
656+ */
657+ typedef int (* opal_accelerator_base_module_get_num_devices_fn_t )(int * num_devices );
658+
659+ /**
660+ * Get the memory bandwidth of the device.
661+ *
662+ * @param[IN] device The device to query.
663+ * @param[OUT] bw The returned bandwidth for the device.
664+ *
665+ * @return OPAL_SUCCESS or error status on failure
666+ */
667+ typedef int (* opal_accelerator_base_module_get_mem_bw_fn_t )(int device , float * bw );
668+
669+
560670/*
561671 * the standard public API data structure
562672 */
@@ -572,10 +682,13 @@ typedef struct {
572682
573683 opal_accelerator_base_module_memcpy_async_fn_t mem_copy_async ;
574684 opal_accelerator_base_module_memcpy_fn_t mem_copy ;
685+ opal_accelerator_base_module_memmove_async_fn_t mem_move_async ;
575686 opal_accelerator_base_module_memmove_fn_t mem_move ;
576687
577688 opal_accelerator_base_module_mem_alloc_fn_t mem_alloc ;
578689 opal_accelerator_base_module_mem_release_fn_t mem_release ;
690+ opal_accelerator_base_module_mem_alloc_stream_fn_t mem_alloc_stream ;
691+ opal_accelerator_base_module_mem_release_stream_fn_t mem_release_stream ;
579692 opal_accelerator_base_module_get_address_range_fn_t get_address_range ;
580693
581694 opal_accelerator_base_module_is_ipc_enabled_fn_t is_ipc_enabled ;
@@ -595,6 +708,10 @@ typedef struct {
595708 opal_accelerator_base_module_device_can_access_peer_fn_t device_can_access_peer ;
596709
597710 opal_accelerator_base_module_get_buffer_id_fn_t get_buffer_id ;
711+
712+ opal_accelerator_base_module_wait_stream_fn_t wait_stream ;
713+ opal_accelerator_base_module_get_num_devices_fn_t num_devices ;
714+ opal_accelerator_base_module_get_mem_bw_fn_t get_mem_bw ;
598715} opal_accelerator_base_module_t ;
599716
600717/**
0 commit comments