|
12 | 12 |
|
13 | 13 | #include "common.hpp"
|
14 | 14 |
|
15 |
| -int get_main_device(){ |
16 |
| - return g_main_device; |
| 15 | +int get_main_device() { |
| 16 | + return g_main_device; |
17 | 17 | }
|
18 | 18 |
|
19 | 19 | void check_allow_gpu_index(const int device_index) {
|
20 |
| - if (device_index >= g_device_count) { |
21 |
| - char error_buf[256]; |
22 |
| - snprintf(error_buf, sizeof(error_buf), |
23 |
| - "%s error: device_index:%d is out of range: [0-%d]", __func__, |
24 |
| - device_index, g_device_count - 1); |
25 |
| - fprintf(stderr, "%s\n", error_buf); |
26 |
| - assert(false); |
27 |
| - } |
| 20 | + if (device_index >= g_device_count) { |
| 21 | + char error_buf[256]; |
| 22 | + snprintf( |
| 23 | + error_buf, |
| 24 | + sizeof(error_buf), |
| 25 | + "%s error: device_index:%d is out of range: [0-%d]", |
| 26 | + __func__, |
| 27 | + device_index, |
| 28 | + g_device_count - 1); |
| 29 | + fprintf(stderr, "%s\n", error_buf); |
| 30 | + assert(false); |
| 31 | + } |
28 | 32 | }
|
29 | 33 |
|
30 | 34 | void check_allow_gpu_id(const int device_id) {
|
31 |
| - if (!g_sycl_gpu_mgr->is_allowed_gpu(device_id)) { |
32 |
| - char error_buf[256]; |
33 |
| - snprintf(error_buf, sizeof(error_buf), |
34 |
| - "error: cannot set device=%d, which is not allowed. Please " |
35 |
| - "set GPU ID in: [%s]", |
36 |
| - device_id, g_sycl_gpu_mgr->gpus_list.c_str()); |
37 |
| - fprintf(stderr, "%s\n", error_buf); |
38 |
| - throw std::invalid_argument(error_buf); |
39 |
| - } |
| 35 | + if (!g_sycl_gpu_mgr->is_allowed_gpu(device_id)) { |
| 36 | + char error_buf[256]; |
| 37 | + snprintf( |
| 38 | + error_buf, |
| 39 | + sizeof(error_buf), |
| 40 | + "error: cannot set device=%d, which is not allowed. Please " |
| 41 | + "set GPU ID in: [%s]", |
| 42 | + device_id, |
| 43 | + g_sycl_gpu_mgr->gpus_list.c_str()); |
| 44 | + fprintf(stderr, "%s\n", error_buf); |
| 45 | + throw std::invalid_argument(error_buf); |
| 46 | + } |
40 | 47 | }
|
41 | 48 |
|
42 | 49 | int get_current_device_id() {
|
43 |
| - return dpct::dev_mgr::instance().current_device_id(); |
| 50 | + return dpct::dev_mgr::instance().current_device_id(); |
44 | 51 | }
|
45 | 52 |
|
46 |
| -void log_ggml_var_device(const char*name, float *src, size_t total_elements, bool src_on_device){ |
47 |
| - if(!g_ggml_sycl_debug) return; |
48 |
| - if(!src){ |
49 |
| - printf("GGML Tensor:%s skip to save for NULL pointer\n", name); |
50 |
| - return; |
51 |
| - } |
52 |
| - char filename[1024]; |
53 |
| - sprintf(filename, "%s.txt", name); |
54 |
| - printf("GGML Tensor:%s save to %s\n", name, filename); |
55 |
| - |
56 |
| - size_t total_size = total_elements*sizeof(float); |
57 |
| - float *local_buf = NULL; |
58 |
| - if(src_on_device) { |
59 |
| - local_buf = (float *) ggml_sycl_host_malloc(total_size); |
60 |
| - ggml_sycl_set_device(g_main_device); |
61 |
| - dpct::queue_ptr main_stream = g_syclStreams[g_main_device][0]; |
62 |
| - main_stream->memcpy(local_buf, src, total_size).wait(); |
63 |
| - } |
64 |
| - else { |
65 |
| - local_buf = (float *)src; |
66 |
| - } |
67 |
| - |
68 |
| - std::ofstream logfile; |
69 |
| - logfile.open(filename); |
70 |
| - for(size_t i=0; i<total_elements; i++){ |
71 |
| - logfile << local_buf[i] <<" "; |
72 |
| - if((i+1)%20 ==0) logfile <<std::endl; |
73 |
| - } |
74 |
| - logfile <<std::endl; |
75 |
| - logfile.close(); |
76 |
| - |
77 |
| - if(src_on_device) ggml_sycl_host_free(local_buf); |
| 53 | +void log_ggml_var_device( |
| 54 | + const char* name, |
| 55 | + float* src, |
| 56 | + size_t total_elements, |
| 57 | + bool src_on_device) { |
| 58 | + if (!g_ggml_sycl_debug) |
| 59 | + return; |
| 60 | + if (!src) { |
| 61 | + printf("GGML Tensor:%s skip to save for NULL pointer\n", name); |
| 62 | + return; |
| 63 | + } |
| 64 | + char filename[1024]; |
| 65 | + sprintf(filename, "%s.txt", name); |
| 66 | + printf("GGML Tensor:%s save to %s\n", name, filename); |
| 67 | + |
| 68 | + size_t total_size = total_elements * sizeof(float); |
| 69 | + float* local_buf = NULL; |
| 70 | + if (src_on_device) { |
| 71 | + local_buf = (float*)ggml_sycl_host_malloc(total_size); |
| 72 | + ggml_sycl_set_device(g_main_device); |
| 73 | + dpct::queue_ptr main_stream = g_syclStreams[g_main_device][0]; |
| 74 | + main_stream->memcpy(local_buf, src, total_size).wait(); |
| 75 | + } else { |
| 76 | + local_buf = (float*)src; |
| 77 | + } |
| 78 | + |
| 79 | + std::ofstream logfile; |
| 80 | + logfile.open(filename); |
| 81 | + for (size_t i = 0; i < total_elements; i++) { |
| 82 | + logfile << local_buf[i] << " "; |
| 83 | + if ((i + 1) % 20 == 0) |
| 84 | + logfile << std::endl; |
| 85 | + } |
| 86 | + logfile << std::endl; |
| 87 | + logfile.close(); |
| 88 | + |
| 89 | + if (src_on_device) |
| 90 | + ggml_sycl_host_free(local_buf); |
78 | 91 | }
|
79 | 92 |
|
80 |
| -void log_ggml_var_device_fp16(const char*name, sycl::half *src, size_t total_elements, bool src_on_device){ |
81 |
| - if(!g_ggml_sycl_debug) return; |
82 |
| - if(!src){ |
83 |
| - printf("GGML Tensor:%s skip to save for NULL pointer\n", name); |
84 |
| - return; |
85 |
| - } |
86 |
| - char filename[1024]; |
87 |
| - sprintf(filename, "%s.txt", name); |
88 |
| - printf("GGML Tensor:%s save to %s\n", name, filename); |
89 |
| - |
90 |
| - size_t total_size = total_elements*sizeof(sycl::half); |
91 |
| - sycl::half *local_buf = NULL; |
92 |
| - if(src_on_device) { |
93 |
| - local_buf = (sycl::half *) ggml_sycl_host_malloc(total_size); |
94 |
| - ggml_sycl_set_device(g_main_device); |
95 |
| - dpct::queue_ptr main_stream = g_syclStreams[g_main_device][0]; |
96 |
| - main_stream->memcpy(local_buf, src, total_size).wait(); |
97 |
| - } |
98 |
| - else { |
99 |
| - local_buf = (sycl::half *)src; |
100 |
| - } |
101 |
| - |
102 |
| - std::ofstream logfile; |
103 |
| - logfile.open(filename); |
104 |
| - for(size_t i=0; i<total_elements; i++){ |
105 |
| - logfile << local_buf[i] <<" "; |
106 |
| - if((i+1)%20 ==0) logfile <<std::endl; |
107 |
| - } |
108 |
| - logfile <<std::endl; |
109 |
| - logfile.close(); |
110 |
| - |
111 |
| - if(src_on_device) ggml_sycl_host_free(local_buf); |
| 93 | +void log_ggml_var_device_fp16( |
| 94 | + const char* name, |
| 95 | + sycl::half* src, |
| 96 | + size_t total_elements, |
| 97 | + bool src_on_device) { |
| 98 | + if (!g_ggml_sycl_debug) |
| 99 | + return; |
| 100 | + if (!src) { |
| 101 | + printf("GGML Tensor:%s skip to save for NULL pointer\n", name); |
| 102 | + return; |
| 103 | + } |
| 104 | + char filename[1024]; |
| 105 | + sprintf(filename, "%s.txt", name); |
| 106 | + printf("GGML Tensor:%s save to %s\n", name, filename); |
| 107 | + |
| 108 | + size_t total_size = total_elements * sizeof(sycl::half); |
| 109 | + sycl::half* local_buf = NULL; |
| 110 | + if (src_on_device) { |
| 111 | + local_buf = (sycl::half*)ggml_sycl_host_malloc(total_size); |
| 112 | + ggml_sycl_set_device(g_main_device); |
| 113 | + dpct::queue_ptr main_stream = g_syclStreams[g_main_device][0]; |
| 114 | + main_stream->memcpy(local_buf, src, total_size).wait(); |
| 115 | + } else { |
| 116 | + local_buf = (sycl::half*)src; |
| 117 | + } |
| 118 | + |
| 119 | + std::ofstream logfile; |
| 120 | + logfile.open(filename); |
| 121 | + for (size_t i = 0; i < total_elements; i++) { |
| 122 | + logfile << local_buf[i] << " "; |
| 123 | + if ((i + 1) % 20 == 0) |
| 124 | + logfile << std::endl; |
| 125 | + } |
| 126 | + logfile << std::endl; |
| 127 | + logfile.close(); |
| 128 | + |
| 129 | + if (src_on_device) |
| 130 | + ggml_sycl_host_free(local_buf); |
112 | 131 | }
|
113 | 132 |
|
114 |
| -void print_ggml_tensor(const char*name, struct ggml_tensor *src){ |
115 |
| - if(!g_ggml_sycl_debug) return; |
116 |
| - if(!src){ |
117 |
| - printf("GGML Tensor:%s skip to save for NULL pointer\n", name); |
118 |
| - return; |
119 |
| - } |
120 |
| - |
121 |
| - size_t total_elements = ggml_nelements(src); |
122 |
| - |
123 |
| - const bool src_on_device = src->backend == GGML_BACKEND_TYPE_GPU || src->backend == GGML_BACKEND_TYPE_GPU_SPLIT; |
124 |
| - float *src_data =NULL; |
125 |
| - if(src_on_device) { |
126 |
| - ggml_tensor_extra_gpu * src_extra = (ggml_tensor_extra_gpu *) src->extra; |
127 |
| - src_data = (float*)src_extra->data_device[g_main_device]; |
128 |
| - } |
129 |
| - else { |
130 |
| - src_data = (float *)src->data; |
131 |
| - } |
132 |
| - |
133 |
| - log_ggml_var_device(name, src_data, total_elements, src_on_device); |
| 133 | +void print_ggml_tensor(const char* name, struct ggml_tensor* src) { |
| 134 | + if (!g_ggml_sycl_debug) |
| 135 | + return; |
| 136 | + if (!src) { |
| 137 | + printf("GGML Tensor:%s skip to save for NULL pointer\n", name); |
| 138 | + return; |
| 139 | + } |
| 140 | + |
| 141 | + size_t total_elements = ggml_nelements(src); |
| 142 | + |
| 143 | + const bool src_on_device = src->backend == GGML_BACKEND_TYPE_GPU || |
| 144 | + src->backend == GGML_BACKEND_TYPE_GPU_SPLIT; |
| 145 | + float* src_data = NULL; |
| 146 | + if (src_on_device) { |
| 147 | + ggml_tensor_extra_gpu* src_extra = (ggml_tensor_extra_gpu*)src->extra; |
| 148 | + src_data = (float*)src_extra->data_device[g_main_device]; |
| 149 | + } else { |
| 150 | + src_data = (float*)src->data; |
| 151 | + } |
| 152 | + |
| 153 | + log_ggml_var_device(name, src_data, total_elements, src_on_device); |
134 | 154 | }
|
135 | 155 |
|
136 |
| -void log_tensor_with_cnt(const char* name, struct ggml_tensor * src, int stop_cnt) { |
137 |
| - stop_cnt = 4; |
138 |
| - if(log_file_name_idx>=stop_cnt) return; |
139 |
| - char filename[1280]; |
140 |
| - sprintf(filename, "%s_%07d", name, log_file_name_idx); |
141 |
| - log_file_name_idx++; |
142 |
| - print_ggml_tensor(filename, src); |
| 156 | +void log_tensor_with_cnt( |
| 157 | + const char* name, |
| 158 | + struct ggml_tensor* src, |
| 159 | + int stop_cnt) { |
| 160 | + stop_cnt = 4; |
| 161 | + if (log_file_name_idx >= stop_cnt) |
| 162 | + return; |
| 163 | + char filename[1280]; |
| 164 | + sprintf(filename, "%s_%07d", name, log_file_name_idx); |
| 165 | + log_file_name_idx++; |
| 166 | + print_ggml_tensor(filename, src); |
143 | 167 | }
|
144 | 168 |
|
145 |
| -void *ggml_sycl_host_malloc(size_t size) try { |
146 |
| - if (getenv("GGML_SYCL_NO_PINNED") != nullptr) { |
147 |
| - return nullptr; |
148 |
| - } |
149 |
| - |
150 |
| - void * ptr = nullptr; |
151 |
| - //allow to use dpct::get_in_order_queue() for host malloc |
152 |
| - dpct::err0 err = CHECK_TRY_ERROR( |
153 |
| - ptr = (void *)sycl::malloc_host(size, dpct::get_in_order_queue())); |
154 |
| - |
155 |
| - if (err != 0) { |
156 |
| - // clear the error |
157 |
| - fprintf( |
158 |
| - stderr, |
159 |
| - "WARNING: failed to allocate %.2f MB of pinned memory: %s\n", |
160 |
| - size / 1024.0 / 1024.0, |
161 |
| - "syclGetErrorString is not supported"); |
162 |
| - return nullptr; |
163 |
| - } |
164 |
| - |
165 |
| - return ptr; |
166 |
| -} |
167 |
| -catch (sycl::exception const &exc) { |
| 169 | +void* ggml_sycl_host_malloc(size_t size) try { |
| 170 | + if (getenv("GGML_SYCL_NO_PINNED") != nullptr) { |
| 171 | + return nullptr; |
| 172 | + } |
| 173 | + |
| 174 | + void* ptr = nullptr; |
| 175 | + // allow to use dpct::get_in_order_queue() for host malloc |
| 176 | + dpct::err0 err = CHECK_TRY_ERROR( |
| 177 | + ptr = (void*)sycl::malloc_host(size, dpct::get_in_order_queue())); |
| 178 | + |
| 179 | + if (err != 0) { |
| 180 | + // clear the error |
| 181 | + fprintf( |
| 182 | + stderr, |
| 183 | + "WARNING: failed to allocate %.2f MB of pinned memory: %s\n", |
| 184 | + size / 1024.0 / 1024.0, |
| 185 | + "syclGetErrorString is not supported"); |
| 186 | + return nullptr; |
| 187 | + } |
| 188 | + |
| 189 | + return ptr; |
| 190 | +} catch (sycl::exception const& exc) { |
168 | 191 | std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
169 | 192 | << ", line:" << __LINE__ << std::endl;
|
170 | 193 | std::exit(1);
|
171 | 194 | }
|
172 | 195 |
|
173 |
| -void ggml_sycl_host_free(void *ptr) try { |
174 |
| - //allow to use dpct::get_in_order_queue() for host malloc |
175 |
| - SYCL_CHECK(CHECK_TRY_ERROR(sycl::free(ptr, dpct::get_in_order_queue()))); |
176 |
| -} |
177 |
| -catch (sycl::exception const &exc) { |
| 196 | +void ggml_sycl_host_free(void* ptr) try { |
| 197 | + // allow to use dpct::get_in_order_queue() for host malloc |
| 198 | + SYCL_CHECK(CHECK_TRY_ERROR(sycl::free(ptr, dpct::get_in_order_queue()))); |
| 199 | +} catch (sycl::exception const& exc) { |
178 | 200 | std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
179 | 201 | << ", line:" << __LINE__ << std::endl;
|
180 | 202 | std::exit(1);
|
|
0 commit comments