Skip to content

Commit 38f09be

Browse files
committed
backup
1 parent 09e3a9e commit 38f09be

File tree

3 files changed

+28
-16
lines changed

3 files changed

+28
-16
lines changed

ggml-sycl/common.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,6 @@ int get_main_device() {
1616
return g_main_device;
1717
}
1818

19-
void check_allow_gpu_index(const int device_index) {
20-
if (device_index >= g_device_count) {
21-
char error_buf[256];
22-
snprintf(
23-
error_buf,
24-
sizeof(error_buf),
25-
"%s error: device_index:%d is out of range: [0-%d]",
26-
__func__,
27-
device_index,
28-
g_device_count - 1);
29-
fprintf(stderr, "%s\n", error_buf);
30-
assert(false);
31-
}
32-
}
33-
3419
int get_current_device_id() {
3520
return dpct::dev_mgr::instance().current_device_id();
3621
}

ggml-sycl/common.hpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,20 @@ int get_main_device();
369369
device_index: device index from 0 to n (continue numbers).
370370
It is used for device select/set in SYCL backend internal data structure.
371371
*/
372-
void check_allow_gpu_index(const int device_index);
372+
inline void check_allow_gpu_index(const int device_index) {
373+
if (device_index >= g_device_count) {
374+
char error_buf[256];
375+
snprintf(
376+
error_buf,
377+
sizeof(error_buf),
378+
"%s error: device_index:%d is out of range: [0-%d]",
379+
__func__,
380+
device_index,
381+
g_device_count - 1);
382+
fprintf(stderr, "%s\n", error_buf);
383+
assert(false);
384+
}
385+
}
373386

374387
/*
375388
device_id: device ID is shown by ggml_backend_sycl_print_sycl_devices().

ggml-sycl/dmmv.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,20 @@ void ggml_sycl_op_dequantize_mul_mat_vec(
952952
const int64_t ne00 = src0->ne[0];
953953
const int64_t row_diff = row_high - row_low;
954954

955+
switch (src1->type)
956+
{
957+
case GGML_TYPE_F32:
958+
printf("f32\n");
959+
break;
960+
case GGML_TYPE_Q4_0:
961+
printf("q4_0\n");
962+
break;
963+
case GGML_TYPE_F16:
964+
printf("f16\n");
965+
break;
966+
default:
967+
break;
968+
}
955969
GGML_ASSERT(src1->type == GGML_TYPE_F32);
956970

957971
// on some GPUs it is faster to convert src1 to half and to use half precision intrinsics

0 commit comments

Comments
 (0)