|
| 1 | +/*===- __cuda_runtime.h - LLVM/Offload wrappers for CUDA runtime API -------=== |
| 2 | + * |
| 3 | + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | + * See https://llvm.org/LICENSE.txt for license information. |
| 5 | + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | + * |
| 7 | + *===-----------------------------------------------------------------------=== |
| 8 | + */ |
| 9 | + |
| 10 | +#ifndef __CUDA_RUNTIME_API__ |
| 11 | +#define __CUDA_RUNTIME_API__ |
| 12 | + |
| 13 | +#include <cstddef> |
| 14 | +#include <optional> |
| 15 | + |
| 16 | +extern "C" { |
| 17 | +int omp_get_initial_device(void); |
| 18 | +void omp_target_free(void *Ptr, int Device); |
| 19 | +void *omp_target_alloc(size_t Size, int Device); |
| 20 | +int omp_target_memcpy(void *Dst, const void *Src, size_t Length, |
| 21 | + size_t DstOffset, size_t SrcOffset, int DstDevice, |
| 22 | + int SrcDevice); |
| 23 | +void *omp_target_memset(void *Ptr, int C, size_t N, int DeviceNum); |
| 24 | +} |
| 25 | + |
| 26 | +// TODO: There are many fields missing in this enumeration. |
| 27 | +typedef enum cudaError { |
| 28 | + cudaSuccess = 0, |
| 29 | + cudaErrorInvalidValue = 1, |
| 30 | + cudaErrorMemoryAllocation = 2, |
| 31 | + cudaErrorNoDevice = 100, |
| 32 | + cudaErrorInvalidDevice = 101, |
| 33 | + cudaErrorOTHER = -1, |
| 34 | +} cudaError_t; |
| 35 | + |
| 36 | +enum cudaMemcpyKind { |
| 37 | + cudaMemcpyHostToHost = 0, |
| 38 | + cudaMemcpyHostToDevice = 1, |
| 39 | + cudaMemcpyDeviceToHost = 2, |
| 40 | + cudaMemcpyDeviceToDevice = 3, |
| 41 | + cudaMemcpyDefault = 4 |
| 42 | +}; |
| 43 | + |
| 44 | +typedef void *cudaStream_t; |
| 45 | + |
| 46 | +static thread_local cudaError_t __cudaomp_last_error = cudaSuccess; |
| 47 | + |
| 48 | +// Returns the last error that has been produced and resets it to cudaSuccess. |
| 49 | +inline cudaError_t cudaGetLastError() { |
| 50 | + cudaError_t TempError = __cudaomp_last_error; |
| 51 | + __cudaomp_last_error = cudaSuccess; |
| 52 | + return TempError; |
| 53 | +} |
| 54 | + |
| 55 | +// Returns the last error that has been produced without reseting it. |
| 56 | +inline cudaError_t cudaPeekAtLastError() { return __cudaomp_last_error; } |
| 57 | + |
| 58 | +inline cudaError_t __cudaMalloc(void **devPtr, size_t size) { |
| 59 | + int DeviceNum = 0; |
| 60 | + *devPtr = omp_target_alloc(size, DeviceNum); |
| 61 | + if (*devPtr == NULL) |
| 62 | + return __cudaomp_last_error = cudaErrorMemoryAllocation; |
| 63 | + |
| 64 | + return __cudaomp_last_error = cudaSuccess; |
| 65 | +} |
| 66 | + |
| 67 | +template <class T> cudaError_t cudaMalloc(T **devPtr, size_t size) { |
| 68 | + return __cudaMalloc((void **)devPtr, size); |
| 69 | +} |
| 70 | + |
| 71 | +inline cudaError_t __cudaFree(void *devPtr) { |
| 72 | + int DeviceNum = 0; |
| 73 | + omp_target_free(devPtr, DeviceNum); |
| 74 | + return __cudaomp_last_error = cudaSuccess; |
| 75 | +} |
| 76 | + |
| 77 | +template <class T> inline cudaError_t cudaFree(T *ptr) { |
| 78 | + return __cudaFree((void *)ptr); |
| 79 | +} |
| 80 | + |
| 81 | +inline cudaError_t __cudaMemcpy(void *dst, const void *src, size_t count, |
| 82 | + cudaMemcpyKind kind) { |
| 83 | + // get the host device number (which is the inital device) |
| 84 | + int HostDeviceNum = omp_get_initial_device(); |
| 85 | + |
| 86 | + // use the default device for gpu |
| 87 | + int GPUDeviceNum = 0; |
| 88 | + |
| 89 | + // default to copy from host to device |
| 90 | + int DstDeviceNum = GPUDeviceNum; |
| 91 | + int SrcDeviceNum = HostDeviceNum; |
| 92 | + |
| 93 | + if (kind == cudaMemcpyDeviceToHost) |
| 94 | + std::swap(DstDeviceNum, SrcDeviceNum); |
| 95 | + |
| 96 | + // omp_target_memcpy returns 0 on success and non-zero on failure |
| 97 | + if (omp_target_memcpy(dst, src, count, 0, 0, DstDeviceNum, SrcDeviceNum)) |
| 98 | + return __cudaomp_last_error = cudaErrorInvalidValue; |
| 99 | + return __cudaomp_last_error = cudaSuccess; |
| 100 | +} |
| 101 | + |
| 102 | +template <class T> |
| 103 | +inline cudaError_t cudaMemcpy(T *dst, const T *src, size_t count, |
| 104 | + cudaMemcpyKind kind) { |
| 105 | + return __cudaMemcpy((void *)dst, (const void *)src, count, kind); |
| 106 | +} |
| 107 | + |
| 108 | +inline cudaError_t __cudaMemset(void *devPtr, int value, size_t count, |
| 109 | + cudaStream_t stream = 0) { |
| 110 | + int DeviceNum = 0; |
| 111 | + if (!omp_target_memset(devPtr, value, count, DeviceNum)) |
| 112 | + return __cudaomp_last_error = cudaErrorInvalidValue; |
| 113 | + return __cudaomp_last_error = cudaSuccess; |
| 114 | +} |
| 115 | + |
| 116 | +template <class T> |
| 117 | +inline cudaError_t cudaMemset(T *devPtr, int value, size_t count) { |
| 118 | + return __cudaMemset((void *)devPtr, value, count); |
| 119 | +} |
| 120 | + |
| 121 | +inline cudaError_t cudaDeviceSynchronize() { |
| 122 | + // TODO: not implemented, not async yet. |
| 123 | + return __cudaomp_last_error = cudaSuccess; |
| 124 | +} |
| 125 | + |
| 126 | +inline cudaError_t cudaDeviceReset(void) { |
| 127 | + // TODO: not implemented. |
| 128 | + return __cudaomp_last_error = cudaSuccess; |
| 129 | +} |
| 130 | + |
| 131 | +#endif |
0 commit comments