Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 71 additions & 17 deletions opal/mca/accelerator/rocm/accelerator_rocm_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ static int accelerator_rocm_component_register(void)

int opal_accelerator_rocm_lazy_init()
{
hipError_t hip_err;
int err = OPAL_SUCCESS;

/* Double checked locking to avoid having to
Expand All @@ -196,41 +197,94 @@ int opal_accelerator_rocm_lazy_init()
goto out;
}

err = hipGetDeviceCount(&opal_accelerator_rocm_num_devices);
if (hipSuccess != err) {
hip_err = hipGetDeviceCount(&opal_accelerator_rocm_num_devices);
if (hipSuccess != hip_err) {
opal_output(0, "Failed to query device count, err=%d %s\n",
err, hipGetErrorString(err));
err = OPAL_ERROR;
hip_err, hipGetErrorString(hip_err));
err = OPAL_ERROR;
goto out;
}

hipStream_t memcpy_stream;
err = hipStreamCreate(&memcpy_stream);
if (hipSuccess != err) {
hip_err = hipStreamCreate(&memcpy_stream);
if (hipSuccess != hip_err) {
opal_output(0, "Could not create hipStream, err=%d %s\n",
err, hipGetErrorString(err));
err = OPAL_ERROR; // we got hipErrorInvalidValue, pretty bad
hip_err, hipGetErrorString(hip_err));
err = OPAL_ERROR; // we got hipErrorInvalidValue, pretty bad
goto out;
}

opal_accelerator_rocm_MemcpyStream = malloc(sizeof(hipStream_t));
if (NULL == opal_accelerator_rocm_MemcpyStream) {
opal_output(0, "Could not allocate hipStream\n");
err = OPAL_ERR_OUT_OF_RESOURCE;
goto out;
}
*opal_accelerator_rocm_MemcpyStream = memcpy_stream;

opal_accelerator_rocm_mem_bw = malloc(sizeof(float)*opal_accelerator_rocm_num_devices);
if (NULL == opal_accelerator_rocm_mem_bw) {
opal_output(0, "Could not allocate memory_bw array\n");
err = OPAL_ERR_OUT_OF_RESOURCE;
goto out;
}

for (int i = 0; i < opal_accelerator_rocm_num_devices; ++i) {
int mem_clock_rate; // kHz
err = hipDeviceGetAttribute(&mem_clock_rate,
hipDeviceAttributeMemoryClockRate,
i);
hip_err = hipDeviceGetAttribute(&mem_clock_rate,
hipDeviceAttributeMemoryClockRate,
i);
if (hipSuccess != hip_err) {
opal_output(0, "Failed to query device MemoryClockRate, err=%d %s\n",
hip_err, hipGetErrorString(hip_err));
err = OPAL_ERROR;
goto out;
}

int bus_width; // bit
err = hipDeviceGetAttribute(&bus_width,
hipDeviceAttributeMemoryBusWidth,
i);
/* bw = clock_rate * bus width * 2bit multiplier
* See https://forums.developer.nvidia.com/t/memory-clock-rate/107940
*/
hip_err = hipDeviceGetAttribute(&bus_width,
hipDeviceAttributeMemoryBusWidth,
i);
if (hipSuccess != hip_err) {
opal_output(0, "Failed to query device MemoryBusWidth, err=%d %s\n",
hip_err, hipGetErrorString(hip_err));
err = OPAL_ERROR;
goto out;
}

/* bw = clock_rate * bus width * 2bit multiplier */
float bw = ((float)mem_clock_rate*(float)bus_width*2.0) / 1024 / 1024 / 8;
opal_accelerator_rocm_mem_bw[i] = bw;
}

#if HIP_VERSION >= 60000000
int dev_id;
hip_err = hipGetDevice(&dev_id);
if (hipSuccess != hip_err) {
opal_output(0, "error retrieving current device");
err = OPAL_ERROR;
goto out;
}

int has_large_bar = 0;
hip_err = hipDeviceGetAttribute (&has_large_bar, hipDeviceAttributeIsLargeBar,
dev_id);
if (hipSuccess != hip_err) {
opal_output(0, "error retrieving current device");
err = OPAL_ERROR;
goto out;
}

if (0 == has_large_bar) {
// Without large BAR we have to use hipMemcpy(Async) for all data transfers
opal_output(0, "Large BAR support is not enabled on current device. "
"Enable large BAR support in BIOS (Above 4G Encoding) for "
"better performance\n.");
opal_accelerator_rocm_memcpyH2D_limit = 0;
opal_accelerator_rocm_memcpyD2H_limit = 0;
}
#endif

err = OPAL_SUCCESS;
opal_atomic_wmb();
accelerator_rocm_init_complete = true;
Expand Down
Loading