@@ -16019,11 +16019,11 @@ __dpct_inline__ static void k_copy_dst_from_contiguous(
16019
16019
static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
16020
16020
const ggml_tensor *src1,
16021
16021
ggml_tensor *dst) try {
16022
- GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT &&
16023
- "mul_mat_id does not support split buffers");
16022
+ GGML_ASSERT(!ggml_backend_buffer_is_sycl_split( src0->buffer) && "mul_mat_id does not support split buffers");
16023
+
16024
16024
const ggml_tensor *ids = dst->src[2];
16025
16025
GGML_TENSOR_BINARY_OP_LOCALS
16026
- GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers");
16026
+
16027
16027
const dpct::queue_ptr stream = g_syclStreams[g_main_device][0];
16028
16028
16029
16029
const int64_t n_as = ne02;
@@ -16142,13 +16142,13 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
16142
16142
sycl::range<3> block_dims(1, 1, std::min((unsigned int)ne10, 768u));
16143
16143
sycl::range<3> grid_dims(1, n_ids, ids->ne[1]);
16144
16144
stream->submit([&](sycl::handler &cgh) {
16145
- sycl::local_accessor<int, 0> src1_row_acc_ct1 (cgh);
16145
+ sycl::local_accessor<int, 0> src1_row_acc (cgh);
16146
16146
16147
- char *__restrict src1_contiguous_get_ct1 =
16147
+ char *__restrict src1_contiguous_get =
16148
16148
src1_contiguous.get();
16149
- int *__restrict dev_cur_src1_row_get_ct2 =
16149
+ int *__restrict dev_cur_src1_row_get =
16150
16150
dev_cur_src1_row.get();
16151
- mmid_row_mapping *__restrict dev_row_mapping_get_ct3 =
16151
+ mmid_row_mapping *__restrict dev_row_mapping_get =
16152
16152
dev_row_mapping.get();
16153
16153
size_t ids_nb_ct6 = ids->nb[1];
16154
16154
size_t ids_nb_ct7 = ids->nb[0];
@@ -16157,11 +16157,11 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
16157
16157
sycl::nd_range<3>(grid_dims * block_dims, block_dims),
16158
16158
[=](sycl::nd_item<3> item_ct1) {
16159
16159
k_copy_src1_to_contiguous(
16160
- src1_original, src1_contiguous_get_ct1 ,
16161
- dev_cur_src1_row_get_ct2 ,
16162
- dev_row_mapping_get_ct3 , ids_dev, i02,
16160
+ src1_original, src1_contiguous_get ,
16161
+ dev_cur_src1_row_get ,
16162
+ dev_row_mapping_get , ids_dev, i02,
16163
16163
ids_nb_ct6, ids_nb_ct7, ne11, ne10, nb11, nb12,
16164
- item_ct1, src1_row_acc_ct1 );
16164
+ item_ct1, src1_row_acc );
16165
16165
});
16166
16166
});
16167
16167
}
@@ -16187,25 +16187,22 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
16187
16187
sycl::range<3> block_dims(1, 1, std::min((unsigned int)ne0, 768u));
16188
16188
sycl::range<3> grid_dims(1, 1, num_src1_rows);
16189
16189
stream->submit([&](sycl::handler &cgh) {
16190
- const char *__restrict dst_contiguous_get_ct1 =
16190
+ const char *__restrict dst_contiguous_get =
16191
16191
dst_contiguous.get();
16192
- const mmid_row_mapping *__restrict dev_row_mapping_get_ct2 =
16192
+ const mmid_row_mapping *__restrict dev_row_mapping_get =
16193
16193
dev_row_mapping.get();
16194
16194
16195
16195
cgh.parallel_for(
16196
16196
sycl::nd_range<3>(grid_dims * block_dims, block_dims),
16197
16197
[=](sycl::nd_item<3> item_ct1) {
16198
16198
k_copy_dst_from_contiguous(dst_original,
16199
- dst_contiguous_get_ct1 ,
16200
- dev_row_mapping_get_ct2 ,
16199
+ dst_contiguous_get ,
16200
+ dev_row_mapping_get ,
16201
16201
ne0, nb1, nb2, item_ct1);
16202
16202
});
16203
16203
});
16204
16204
}
16205
16205
}
16206
- if (dst->backend == GGML_BACKEND_TYPE_CPU) {
16207
- SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));
16208
- }
16209
16206
}
16210
16207
}
16211
16208
catch (sycl::exception const &exc) {
0 commit comments