@@ -201,63 +201,43 @@ static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
201
201
202
202
static_assert (sizeof (half) == sizeof(ggml_fp16_t ), "wrong fp16 size");
203
203
204
- #define CUDA_CHECK (err ) \
205
- do { \
206
- cudaError_t err_ = (err); \
207
- if (err_ != cudaSuccess) { \
208
- int id; \
209
- cudaGetDevice (&id); \
210
- fprintf (stderr, " \n CUDA error %d at %s:%d: %s\n " , err_, __FILE__, __LINE__, \
211
- cudaGetErrorString (err_)); \
212
- fprintf (stderr, " current device: %d\n " , id); \
213
- GGML_ASSERT (!" CUDA error" ); \
214
- } \
215
- } while (0 )
216
-
217
- // driver API
218
- #define CU_CHECK (err ) \
219
- do { \
220
- CUresult err_ = (err); \
221
- if (err_ != CUDA_SUCCESS) { \
222
- int id; \
223
- cuDeviceGet (&id, 0 ); \
224
- const char * err_str; \
225
- cuGetErrorString (err_, &err_str); \
226
- fprintf (stderr, " \n CUDA error %d at %s:%d: %s\n " , err_, __FILE__, __LINE__, \
227
- err_str); \
228
- fprintf (stderr, " %s\n " , #err); \
229
- fprintf (stderr, " current device: %d\n " , id); \
230
- GGML_ASSERT (!" CUDA error" ); \
231
- } \
232
- } while (0 )
233
-
234
-
235
204
#if CUDART_VERSION >= 12000
236
- #define CUBLAS_CHECK (err ) \
237
- do { \
238
- cublasStatus_t err_ = (err); \
239
- if (err_ != CUBLAS_STATUS_SUCCESS) { \
240
- int id; \
241
- cudaGetDevice (&id); \
242
- fprintf (stderr, " \n cuBLAS error %d at %s:%d: %s\n " , \
243
- err_, __FILE__, __LINE__, cublasGetStatusString (err_)); \
244
- fprintf (stderr, " current device: %d\n " , id); \
245
- GGML_ASSERT (!" cuBLAS error" ); \
246
- } \
247
- } while (0 )
205
+ static const char * cublas_get_error_str (const cublasStatus_t err) {
206
+ return cublasGetStatusString (err);
207
+ }
248
208
#else
249
- #define CUBLAS_CHECK (err ) \
250
- do { \
251
- cublasStatus_t err_ = (err); \
252
- if (err_ != CUBLAS_STATUS_SUCCESS) { \
253
- int id; \
254
- cudaGetDevice (&id); \
255
- fprintf (stderr, " \n cuBLAS error %d at %s:%d\n " , err_, __FILE__, __LINE__); \
256
- fprintf (stderr, " current device: %d\n " , id); \
257
- GGML_ASSERT (!" cuBLAS error" ); \
258
- } \
259
- } while (0 )
260
- #endif // CUDART_VERSION >= 11
209
+ static const char * cublas_get_error_str (const cublasStatus_t err) {
210
+ switch (err) {
211
+ case CUBLAS_STATUS_SUCCESS: return " CUBLAS_STATUS_SUCCESS" ;
212
+ case CUBLAS_STATUS_NOT_INITIALIZED: return " CUBLAS_STATUS_NOT_INITIALIZED" ;
213
+ case CUBLAS_STATUS_ALLOC_FAILED: return " CUBLAS_STATUS_ALLOC_FAILED" ;
214
+ case CUBLAS_STATUS_INVALID_VALUE: return " CUBLAS_STATUS_INVALID_VALUE" ;
215
+ case CUBLAS_STATUS_ARCH_MISMATCH: return " CUBLAS_STATUS_ARCH_MISMATCH" ;
216
+ case CUBLAS_STATUS_MAPPING_ERROR: return " CUBLAS_STATUS_MAPPING_ERROR" ;
217
+ case CUBLAS_STATUS_EXECUTION_FAILED: return " CUBLAS_STATUS_EXECUTION_FAILED" ;
218
+ case CUBLAS_STATUS_INTERNAL_ERROR: return " CUBLAS_STATUS_INTERNAL_ERROR" ;
219
+ case CUBLAS_STATUS_NOT_SUPPORTED: return " CUBLAS_STATUS_NOT_SUPPORTED" ;
220
+ case CUBLAS_STATUS_LICENSE_ERROR: return " CUBLAS_STATUS_LICENSE_ERROR" ;
221
+ default : return " unknown error" ;
222
+ }
223
+ #endif // CUDART_VERSION >= 12000
224
+
225
+ static const char * cu_get_error_str (CUresult err) {
226
+ const char * err_str;
227
+ cuGetErrorString (err, &err_str);
228
+ return err_str;
229
+ }
230
+
231
+ [[noreturn]]
232
+ static void ggml_cuda_error (const char * stmt, const char * func, const char * file, const int line, const char * msg) {
233
+ fprintf (stderr, " CUDA error: %s: %s\n " , stmt, msg);
234
+ fprintf (stderr, " in function %s at %s:%d\n " , func, file, line);
235
+ GGML_ASSERT (!" CUDA error" );
236
+ }
237
+
238
+ #define CUDA_CHECK (err ) do { auto err_ = (err); if (err_ != cudaSuccess) ggml_cuda_error (#err, __func__, __FILE__, __LINE__, cudaGetErrorString (err_)); } while (0 )
239
+ #define CUBLAS_CHECK (err ) do { auto err_ = (err); if (err_ != CUBLAS_STATUS_SUCCESS) ggml_cuda_error (#err, __func__, __FILE__, __LINE__, cublas_get_error_str (err_)); } while (0 )
240
+ #define CU_CHECK (err ) do { auto err_ = (err); if (err_ != CUDA_SUCCESS) ggml_cuda_error (#err, __func__, __FILE__, __LINE__, cu_get_error_str (err_)); } while (0 )
261
241
262
242
#if CUDART_VERSION >= 11100
263
243
#define GGML_CUDA_ASSUME (x ) __builtin_assume(x)
@@ -537,13 +517,13 @@ static int g_device_count = -1;
537
517
static int g_main_device = 0 ;
538
518
static float g_tensor_split[GGML_CUDA_MAX_DEVICES] = {0 };
539
519
540
- struct device_capabilities {
520
+ struct cuda_device_capabilities {
541
521
int cc; // compute capability
542
522
bool vmm; // virtual memory support
543
523
size_t vmm_granularity; // granularity of virtual memory
544
524
};
545
525
546
- static device_capabilities g_device_caps[GGML_CUDA_MAX_DEVICES] = { {0 , false , 0 } };
526
+ static cuda_device_capabilities g_device_caps[GGML_CUDA_MAX_DEVICES] = { {0 , false , 0 } };
547
527
548
528
549
529
static void * g_scratch_buffer = nullptr ;
0 commit comments