55 * Copyright (c) Amazon.com, Inc. or its affiliates.
66 * All Rights reserved.
77 * Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights reserved.
8+ * Copyright (c) 2024 The University of Tennessee and The University
9+ * of Tennessee Research Foundation. All rights
10+ * reserved.
811 *
912 * $COPYRIGHT$
1013 *
@@ -193,7 +196,7 @@ typedef int (*opal_accelerator_base_module_create_stream_fn_t)(
193196 * @param[IN] dev_id Associated device for the event or
194197 * MCA_ACCELERATOR_NO_DEVICE_ID
195198 * @param[OUT] event Event to create
196- * @param[IN] enable_ipc support inter-process tracking of the event
199+ * @param[IN] enable_ipc support inter-process tracking of the event
197200 *
198201 * @return OPAL_SUCCESS or error status on failure.
199202 */
@@ -310,6 +313,31 @@ typedef int (*opal_accelerator_base_module_memmove_fn_t)(
310313 int dest_dev_id , int src_dev_id , void * dest , const void * src , size_t size ,
311314 opal_accelerator_transfer_type_t type );
312315
316+
317+ /**
318+ * Copies memory asynchronously from src to dest. Memory of dest and src
319+ * may overlap. Optionally can specify the transfer type to
320+ * avoid pointer detection for performance. The operations will be enqueued
321+ * into the provided stream but are not guaranteed to be complete upon return.
322+ *
323+ * @param[IN] dest_dev_id Associated device to copy to or
324+ * MCA_ACCELERATOR_NO_DEVICE_ID
325+ * @param[IN] src_dev_id Associated device to copy from or
326+ * MCA_ACCELERATOR_NO_DEVICE_ID
327+ * @param[IN] dest Destination to copy memory to
328+ * @param[IN] src Source to copy memory from
329+ * @param[IN] size Size of memory to copy
330+ * @param[IN] stream Stream to perform asynchronous move on
331+ * @param[IN] type Transfer type field for performance
332+ * Can be set to MCA_ACCELERATOR_TRANSFER_UNSPEC
333+ * if caller is unsure of the transfer direction.
334+ *
335+ * @return OPAL_SUCCESS or error status on failure
336+ */
337+ typedef int (* opal_accelerator_base_module_memmove_async_fn_t )(
338+ int dest_dev_id , int src_dev_id , void * dest , const void * src , size_t size ,
339+ opal_accelerator_stream_t * stream , opal_accelerator_transfer_type_t type );
340+
313341/**
314342 * Allocates size bytes memory from the device and sets ptr to the
315343 * pointer of the allocated memory. The memory is not initialized.
@@ -340,6 +368,46 @@ typedef int (*opal_accelerator_base_module_mem_alloc_fn_t)(
340368typedef int (* opal_accelerator_base_module_mem_release_fn_t )(
341369 int dev_id , void * ptr );
342370
371+
372+ /**
373+ * Allocates size bytes memory from the device and sets ptr to the
374+ * pointer of the allocated memory. The memory is not initialized.
375+ * The allocation request is placed into the stream object.
376+ * Any use of the memory must succeed the completion of this
377+ * operation on the stream.
378+ *
379+ * @param[IN] dev_id Associated device for the allocation or
380+ * MCA_ACCELERATOR_NO_DEVICE_ID
381+ * @param[OUT] ptr Returns pointer to allocated memory
382+ * @param[IN] size Size of memory to allocate
383+ * @param[IN] stream Stream into which to insert the allocation request
384+ *
385+ * @return OPAL_SUCCESS or error status on failure
386+ */
387+ typedef int (* opal_accelerator_base_module_mem_alloc_stream_fn_t )(
388+ int dev_id , void * * ptr , size_t size , opal_accelerator_stream_t * stream );
389+
390+ /**
391+ * Frees the memory space pointed to by ptr which has been returned by
392+ * a previous call to an opal_accelerator_base_module_mem_alloc_stream_fn_t().
393+ * If the function is called on a ptr that has already been freed,
394+ * undefined behavior occurs. If ptr is NULL, no operation is performed,
395+ * and the function returns OPAL_SUCCESS.
396+ * The release of the memory will be inserted into the stream and occurs after
397+ * all previous operations have completed.
398+ *
399+ * @param[IN] dev_id Associated device for the allocation or
400+ * MCA_ACCELERATOR_NO_DEVICE_ID
401+ * @param[IN] ptr Pointer to free
402+ * @param[IN] stream Stream into which to insert the free operation
403+ *
404+ * @return OPAL_SUCCESS or error status on failure
405+ */
406+ typedef int (* opal_accelerator_base_module_mem_release_stream_fn_t )(
407+ int dev_id , void * ptr , opal_accelerator_stream_t * stream );
408+
409+
410+
343411/**
344412 * Retrieves the base address and/or size of a memory allocation of the
345413 * device.
@@ -557,6 +625,35 @@ typedef int (*opal_accelerator_base_module_device_can_access_peer_fn_t)(
557625typedef int (* opal_accelerator_base_module_get_buffer_id_fn_t )(
558626 int dev_id , const void * addr , opal_accelerator_buffer_id_t * buf_id );
559627
628+ /**
629+ * Wait for the completion of all operations inserted into the stream.
630+ *
631+ * @param[IN] stram The stream to wait for.
632+ *
633+ * @return OPAL_SUCCESS or error status on failure
634+ */
635+ typedef int (* opal_accelerator_base_module_wait_stream_fn_t )(opal_accelerator_stream_t * stream );
636+
637+ /**
638+ * Get the number of devices available.
639+ *
640+ * @param[OUT] stram Number of devices.
641+ *
642+ * @return OPAL_SUCCESS or error status on failure
643+ */
644+ typedef int (* opal_accelerator_base_module_get_num_devices_fn_t )(int * num_devices );
645+
646+ /**
647+ * Get the memory bandwidth of the device.
648+ *
649+ * @param[IN] device The device to query.
650+ * @param[OUT] bw The returned bandwidth for the device.
651+ *
652+ * @return OPAL_SUCCESS or error status on failure
653+ */
654+ typedef int (* opal_accelerator_base_module_get_mem_bw_fn_t )(int device , float * bw );
655+
656+
560657/*
561658 * the standard public API data structure
562659 */
@@ -572,10 +669,13 @@ typedef struct {
572669
573670 opal_accelerator_base_module_memcpy_async_fn_t mem_copy_async ;
574671 opal_accelerator_base_module_memcpy_fn_t mem_copy ;
672+ opal_accelerator_base_module_memmove_async_fn_t mem_move_async ;
575673 opal_accelerator_base_module_memmove_fn_t mem_move ;
576674
577675 opal_accelerator_base_module_mem_alloc_fn_t mem_alloc ;
578676 opal_accelerator_base_module_mem_release_fn_t mem_release ;
677+ opal_accelerator_base_module_mem_alloc_stream_fn_t mem_alloc_stream ;
678+ opal_accelerator_base_module_mem_release_stream_fn_t mem_release_stream ;
579679 opal_accelerator_base_module_get_address_range_fn_t get_address_range ;
580680
581681 opal_accelerator_base_module_is_ipc_enabled_fn_t is_ipc_enabled ;
@@ -595,6 +695,10 @@ typedef struct {
595695 opal_accelerator_base_module_device_can_access_peer_fn_t device_can_access_peer ;
596696
597697 opal_accelerator_base_module_get_buffer_id_fn_t get_buffer_id ;
698+
699+ opal_accelerator_base_module_wait_stream_fn_t wait_stream ;
700+ opal_accelerator_base_module_get_num_devices_fn_t num_devices ;
701+ opal_accelerator_base_module_get_mem_bw_fn_t get_mem_bw ;
598702} opal_accelerator_base_module_t ;
599703
600704/**
0 commit comments