|
12 | 12 |
|
13 | 13 | #include "common.hpp" |
14 | 14 |
|
15 | | -int get_main_device(){ |
16 | | - return g_main_device; |
| 15 | +int get_main_device() { |
| 16 | + return g_main_device; |
17 | 17 | } |
18 | 18 |
|
19 | 19 | void check_allow_gpu_index(const int device_index) { |
20 | | - if (device_index >= g_device_count) { |
21 | | - char error_buf[256]; |
22 | | - snprintf(error_buf, sizeof(error_buf), |
23 | | - "%s error: device_index:%d is out of range: [0-%d]", __func__, |
24 | | - device_index, g_device_count - 1); |
25 | | - fprintf(stderr, "%s\n", error_buf); |
26 | | - assert(false); |
27 | | - } |
| 20 | + if (device_index >= g_device_count) { |
| 21 | + char error_buf[256]; |
| 22 | + snprintf( |
| 23 | + error_buf, |
| 24 | + sizeof(error_buf), |
| 25 | + "%s error: device_index:%d is out of range: [0-%d]", |
| 26 | + __func__, |
| 27 | + device_index, |
| 28 | + g_device_count - 1); |
| 29 | + fprintf(stderr, "%s\n", error_buf); |
| 30 | + assert(false); |
| 31 | + } |
28 | 32 | } |
29 | 33 |
|
30 | 34 | void check_allow_gpu_id(const int device_id) { |
31 | | - if (!g_sycl_gpu_mgr->is_allowed_gpu(device_id)) { |
32 | | - char error_buf[256]; |
33 | | - snprintf(error_buf, sizeof(error_buf), |
34 | | - "error: cannot set device=%d, which is not allowed. Please " |
35 | | - "set GPU ID in: [%s]", |
36 | | - device_id, g_sycl_gpu_mgr->gpus_list.c_str()); |
37 | | - fprintf(stderr, "%s\n", error_buf); |
38 | | - throw std::invalid_argument(error_buf); |
39 | | - } |
| 35 | + if (!g_sycl_gpu_mgr->is_allowed_gpu(device_id)) { |
| 36 | + char error_buf[256]; |
| 37 | + snprintf( |
| 38 | + error_buf, |
| 39 | + sizeof(error_buf), |
| 40 | + "error: cannot set device=%d, which is not allowed. Please " |
| 41 | + "set GPU ID in: [%s]", |
| 42 | + device_id, |
| 43 | + g_sycl_gpu_mgr->gpus_list.c_str()); |
| 44 | + fprintf(stderr, "%s\n", error_buf); |
| 45 | + throw std::invalid_argument(error_buf); |
| 46 | + } |
40 | 47 | } |
41 | 48 |
|
42 | 49 | int get_current_device_id() { |
43 | | - return dpct::dev_mgr::instance().current_device_id(); |
| 50 | + return dpct::dev_mgr::instance().current_device_id(); |
44 | 51 | } |
45 | 52 |
|
46 | | -void log_ggml_var_device(const char*name, float *src, size_t total_elements, bool src_on_device){ |
47 | | - if(!g_ggml_sycl_debug) return; |
48 | | - if(!src){ |
49 | | - printf("GGML Tensor:%s skip to save for NULL pointer\n", name); |
50 | | - return; |
51 | | - } |
52 | | - char filename[1024]; |
53 | | - sprintf(filename, "%s.txt", name); |
54 | | - printf("GGML Tensor:%s save to %s\n", name, filename); |
55 | | - |
56 | | - size_t total_size = total_elements*sizeof(float); |
57 | | - float *local_buf = NULL; |
58 | | - if(src_on_device) { |
59 | | - local_buf = (float *) ggml_sycl_host_malloc(total_size); |
60 | | - ggml_sycl_set_device(g_main_device); |
61 | | - dpct::queue_ptr main_stream = g_syclStreams[g_main_device][0]; |
62 | | - main_stream->memcpy(local_buf, src, total_size).wait(); |
63 | | - } |
64 | | - else { |
65 | | - local_buf = (float *)src; |
66 | | - } |
67 | | - |
68 | | - std::ofstream logfile; |
69 | | - logfile.open(filename); |
70 | | - for(size_t i=0; i<total_elements; i++){ |
71 | | - logfile << local_buf[i] <<" "; |
72 | | - if((i+1)%20 ==0) logfile <<std::endl; |
73 | | - } |
74 | | - logfile <<std::endl; |
75 | | - logfile.close(); |
76 | | - |
77 | | - if(src_on_device) ggml_sycl_host_free(local_buf); |
| 53 | +void log_ggml_var_device( |
| 54 | + const char* name, |
| 55 | + float* src, |
| 56 | + size_t total_elements, |
| 57 | + bool src_on_device) { |
| 58 | + if (!g_ggml_sycl_debug) |
| 59 | + return; |
| 60 | + if (!src) { |
| 61 | + printf("GGML Tensor:%s skip to save for NULL pointer\n", name); |
| 62 | + return; |
| 63 | + } |
| 64 | + char filename[1024]; |
| 65 | + sprintf(filename, "%s.txt", name); |
| 66 | + printf("GGML Tensor:%s save to %s\n", name, filename); |
| 67 | + |
| 68 | + size_t total_size = total_elements * sizeof(float); |
| 69 | + float* local_buf = NULL; |
| 70 | + if (src_on_device) { |
| 71 | + local_buf = (float*)ggml_sycl_host_malloc(total_size); |
| 72 | + ggml_sycl_set_device(g_main_device); |
| 73 | + dpct::queue_ptr main_stream = g_syclStreams[g_main_device][0]; |
| 74 | + main_stream->memcpy(local_buf, src, total_size).wait(); |
| 75 | + } else { |
| 76 | + local_buf = (float*)src; |
| 77 | + } |
| 78 | + |
| 79 | + std::ofstream logfile; |
| 80 | + logfile.open(filename); |
| 81 | + for (size_t i = 0; i < total_elements; i++) { |
| 82 | + logfile << local_buf[i] << " "; |
| 83 | + if ((i + 1) % 20 == 0) |
| 84 | + logfile << std::endl; |
| 85 | + } |
| 86 | + logfile << std::endl; |
| 87 | + logfile.close(); |
| 88 | + |
| 89 | + if (src_on_device) |
| 90 | + ggml_sycl_host_free(local_buf); |
78 | 91 | } |
79 | 92 |
|
80 | | -void log_ggml_var_device_fp16(const char*name, sycl::half *src, size_t total_elements, bool src_on_device){ |
81 | | - if(!g_ggml_sycl_debug) return; |
82 | | - if(!src){ |
83 | | - printf("GGML Tensor:%s skip to save for NULL pointer\n", name); |
84 | | - return; |
85 | | - } |
86 | | - char filename[1024]; |
87 | | - sprintf(filename, "%s.txt", name); |
88 | | - printf("GGML Tensor:%s save to %s\n", name, filename); |
89 | | - |
90 | | - size_t total_size = total_elements*sizeof(sycl::half); |
91 | | - sycl::half *local_buf = NULL; |
92 | | - if(src_on_device) { |
93 | | - local_buf = (sycl::half *) ggml_sycl_host_malloc(total_size); |
94 | | - ggml_sycl_set_device(g_main_device); |
95 | | - dpct::queue_ptr main_stream = g_syclStreams[g_main_device][0]; |
96 | | - main_stream->memcpy(local_buf, src, total_size).wait(); |
97 | | - } |
98 | | - else { |
99 | | - local_buf = (sycl::half *)src; |
100 | | - } |
101 | | - |
102 | | - std::ofstream logfile; |
103 | | - logfile.open(filename); |
104 | | - for(size_t i=0; i<total_elements; i++){ |
105 | | - logfile << local_buf[i] <<" "; |
106 | | - if((i+1)%20 ==0) logfile <<std::endl; |
107 | | - } |
108 | | - logfile <<std::endl; |
109 | | - logfile.close(); |
110 | | - |
111 | | - if(src_on_device) ggml_sycl_host_free(local_buf); |
| 93 | +void log_ggml_var_device_fp16( |
| 94 | + const char* name, |
| 95 | + sycl::half* src, |
| 96 | + size_t total_elements, |
| 97 | + bool src_on_device) { |
| 98 | + if (!g_ggml_sycl_debug) |
| 99 | + return; |
| 100 | + if (!src) { |
| 101 | + printf("GGML Tensor:%s skip to save for NULL pointer\n", name); |
| 102 | + return; |
| 103 | + } |
| 104 | + char filename[1024]; |
| 105 | + sprintf(filename, "%s.txt", name); |
| 106 | + printf("GGML Tensor:%s save to %s\n", name, filename); |
| 107 | + |
| 108 | + size_t total_size = total_elements * sizeof(sycl::half); |
| 109 | + sycl::half* local_buf = NULL; |
| 110 | + if (src_on_device) { |
| 111 | + local_buf = (sycl::half*)ggml_sycl_host_malloc(total_size); |
| 112 | + ggml_sycl_set_device(g_main_device); |
| 113 | + dpct::queue_ptr main_stream = g_syclStreams[g_main_device][0]; |
| 114 | + main_stream->memcpy(local_buf, src, total_size).wait(); |
| 115 | + } else { |
| 116 | + local_buf = (sycl::half*)src; |
| 117 | + } |
| 118 | + |
| 119 | + std::ofstream logfile; |
| 120 | + logfile.open(filename); |
| 121 | + for (size_t i = 0; i < total_elements; i++) { |
| 122 | + logfile << local_buf[i] << " "; |
| 123 | + if ((i + 1) % 20 == 0) |
| 124 | + logfile << std::endl; |
| 125 | + } |
| 126 | + logfile << std::endl; |
| 127 | + logfile.close(); |
| 128 | + |
| 129 | + if (src_on_device) |
| 130 | + ggml_sycl_host_free(local_buf); |
112 | 131 | } |
113 | 132 |
|
114 | | -void print_ggml_tensor(const char*name, struct ggml_tensor *src){ |
115 | | - if(!g_ggml_sycl_debug) return; |
116 | | - if(!src){ |
117 | | - printf("GGML Tensor:%s skip to save for NULL pointer\n", name); |
118 | | - return; |
119 | | - } |
120 | | - |
121 | | - size_t total_elements = ggml_nelements(src); |
122 | | - |
123 | | - const bool src_on_device = src->backend == GGML_BACKEND_TYPE_GPU || src->backend == GGML_BACKEND_TYPE_GPU_SPLIT; |
124 | | - float *src_data =NULL; |
125 | | - if(src_on_device) { |
126 | | - ggml_tensor_extra_gpu * src_extra = (ggml_tensor_extra_gpu *) src->extra; |
127 | | - src_data = (float*)src_extra->data_device[g_main_device]; |
128 | | - } |
129 | | - else { |
130 | | - src_data = (float *)src->data; |
131 | | - } |
132 | | - |
133 | | - log_ggml_var_device(name, src_data, total_elements, src_on_device); |
| 133 | +void print_ggml_tensor(const char* name, struct ggml_tensor* src) { |
| 134 | + if (!g_ggml_sycl_debug) |
| 135 | + return; |
| 136 | + if (!src) { |
| 137 | + printf("GGML Tensor:%s skip to save for NULL pointer\n", name); |
| 138 | + return; |
| 139 | + } |
| 140 | + |
| 141 | + size_t total_elements = ggml_nelements(src); |
| 142 | + |
| 143 | + const bool src_on_device = src->backend == GGML_BACKEND_TYPE_GPU || |
| 144 | + src->backend == GGML_BACKEND_TYPE_GPU_SPLIT; |
| 145 | + float* src_data = NULL; |
| 146 | + if (src_on_device) { |
| 147 | + ggml_tensor_extra_gpu* src_extra = (ggml_tensor_extra_gpu*)src->extra; |
| 148 | + src_data = (float*)src_extra->data_device[g_main_device]; |
| 149 | + } else { |
| 150 | + src_data = (float*)src->data; |
| 151 | + } |
| 152 | + |
| 153 | + log_ggml_var_device(name, src_data, total_elements, src_on_device); |
134 | 154 | } |
135 | 155 |
|
136 | | -void log_tensor_with_cnt(const char* name, struct ggml_tensor * src, int stop_cnt) { |
137 | | - stop_cnt = 4; |
138 | | - if(log_file_name_idx>=stop_cnt) return; |
139 | | - char filename[1280]; |
140 | | - sprintf(filename, "%s_%07d", name, log_file_name_idx); |
141 | | - log_file_name_idx++; |
142 | | - print_ggml_tensor(filename, src); |
| 156 | +void log_tensor_with_cnt( |
| 157 | + const char* name, |
| 158 | + struct ggml_tensor* src, |
| 159 | + int stop_cnt) { |
| 160 | + stop_cnt = 4; |
| 161 | + if (log_file_name_idx >= stop_cnt) |
| 162 | + return; |
| 163 | + char filename[1280]; |
| 164 | + sprintf(filename, "%s_%07d", name, log_file_name_idx); |
| 165 | + log_file_name_idx++; |
| 166 | + print_ggml_tensor(filename, src); |
143 | 167 | } |
144 | 168 |
|
145 | | -void *ggml_sycl_host_malloc(size_t size) try { |
146 | | - if (getenv("GGML_SYCL_NO_PINNED") != nullptr) { |
147 | | - return nullptr; |
148 | | - } |
149 | | - |
150 | | - void * ptr = nullptr; |
151 | | - //allow to use dpct::get_in_order_queue() for host malloc |
152 | | - dpct::err0 err = CHECK_TRY_ERROR( |
153 | | - ptr = (void *)sycl::malloc_host(size, dpct::get_in_order_queue())); |
154 | | - |
155 | | - if (err != 0) { |
156 | | - // clear the error |
157 | | - fprintf( |
158 | | - stderr, |
159 | | - "WARNING: failed to allocate %.2f MB of pinned memory: %s\n", |
160 | | - size / 1024.0 / 1024.0, |
161 | | - "syclGetErrorString is not supported"); |
162 | | - return nullptr; |
163 | | - } |
164 | | - |
165 | | - return ptr; |
166 | | -} |
167 | | -catch (sycl::exception const &exc) { |
| 169 | +void* ggml_sycl_host_malloc(size_t size) try { |
| 170 | + if (getenv("GGML_SYCL_NO_PINNED") != nullptr) { |
| 171 | + return nullptr; |
| 172 | + } |
| 173 | + |
| 174 | + void* ptr = nullptr; |
| 175 | + // allow to use dpct::get_in_order_queue() for host malloc |
| 176 | + dpct::err0 err = CHECK_TRY_ERROR( |
| 177 | + ptr = (void*)sycl::malloc_host(size, dpct::get_in_order_queue())); |
| 178 | + |
| 179 | + if (err != 0) { |
| 180 | + // clear the error |
| 181 | + fprintf( |
| 182 | + stderr, |
| 183 | + "WARNING: failed to allocate %.2f MB of pinned memory: %s\n", |
| 184 | + size / 1024.0 / 1024.0, |
| 185 | + "syclGetErrorString is not supported"); |
| 186 | + return nullptr; |
| 187 | + } |
| 188 | + |
| 189 | + return ptr; |
| 190 | +} catch (sycl::exception const& exc) { |
168 | 191 | std::cerr << exc.what() << "Exception caught at file:" << __FILE__ |
169 | 192 | << ", line:" << __LINE__ << std::endl; |
170 | 193 | std::exit(1); |
171 | 194 | } |
172 | 195 |
|
173 | | -void ggml_sycl_host_free(void *ptr) try { |
174 | | - //allow to use dpct::get_in_order_queue() for host malloc |
175 | | - SYCL_CHECK(CHECK_TRY_ERROR(sycl::free(ptr, dpct::get_in_order_queue()))); |
176 | | -} |
177 | | -catch (sycl::exception const &exc) { |
| 196 | +void ggml_sycl_host_free(void* ptr) try { |
| 197 | + // allow to use dpct::get_in_order_queue() for host malloc |
| 198 | + SYCL_CHECK(CHECK_TRY_ERROR(sycl::free(ptr, dpct::get_in_order_queue()))); |
| 199 | +} catch (sycl::exception const& exc) { |
178 | 200 | std::cerr << exc.what() << "Exception caught at file:" << __FILE__ |
179 | 201 | << ", line:" << __LINE__ << std::endl; |
180 | 202 | std::exit(1); |
|
0 commit comments