Skip to content

Commit 4daa5ee

Browse files
committed
wip : avoid inplace ops
1 parent 5ee92c3 commit 4daa5ee

File tree

3 files changed

+16
-10
lines changed

3 files changed

+16
-10
lines changed

ggml-backend.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,8 @@ static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml
135135
}
136136

137137
void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst) {
138-
//printf("src: %s ne: [%d %d %d %d] nb: [%d %d %d %d]\n", src->name, (int)src->ne[0], (int)src->ne[1], (int)src->ne[2], (int)src->ne[3], (int)src->nb[0], (int)src->nb[1], (int)src->nb[2], (int)src->nb[3]);
139-
//printf("dst: %s ne: [%d %d %d %d] nb: [%d %d %d %d]\n", dst->name, (int)dst->ne[0], (int)dst->ne[1], (int)dst->ne[2], (int)dst->ne[3], (int)dst->nb[0], (int)dst->nb[1], (int)dst->nb[2], (int)dst->nb[3]);
138+
printf("src: %s ne: [%d %d %d %d] nb: [%d %d %d %d]\n", src->name, (int)src->ne[0], (int)src->ne[1], (int)src->ne[2], (int)src->ne[3], (int)src->nb[0], (int)src->nb[1], (int)src->nb[2], (int)src->nb[3]);
139+
printf("dst: %s ne: [%d %d %d %d] nb: [%d %d %d %d]\n", dst->name, (int)dst->ne[0], (int)dst->ne[1], (int)dst->ne[2], (int)dst->ne[3], (int)dst->nb[0], (int)dst->nb[1], (int)dst->nb[2], (int)dst->nb[3]);
140140
GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");
141141

142142
// printf("cpy tensor %s from %s to %s (%lu bytes)\n", src->name, ggml_backend_name(src->backend), ggml_backend_name(dst->backend), ggml_nbytes(src));
@@ -145,6 +145,9 @@ void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst
145145
return;
146146
}
147147

148+
printf("src->data = %p, src->extra = %p\n", src->data, src->extra);
149+
printf("dst->data = %p, dst->extra = %p\n", dst->data, dst->extra);
150+
148151
if (dst->backend->interface.cpy_tensor_from != NULL) {
149152
dst->backend->interface.cpy_tensor_from(dst->backend->context, src, dst);
150153
} else if (src->backend->interface.cpy_tensor_to != NULL) {
@@ -464,7 +467,7 @@ void ggml_graph_splits_compute(struct ggml_graph_splits * splits) {
464467
// copy the input tensor to the backend
465468
uint64_t copy_start_us = ggml_time_us();
466469
for (int j = 0; split->src_inputs[j] != NULL; j++) {
467-
//printf("\tcopying tensor %d (%s) (%lu bytes)\n", j, split->src_inputs[j]->name, ggml_nbytes(split->src_inputs[j]));
470+
printf("\tcopying tensor %d (%s) (%lu bytes)\n", j, split->src_inputs[j]->name, ggml_nbytes(split->src_inputs[j]));
468471
ggml_backend_tensor_copy(split->src_inputs[j], split->dst_inputs[j]);
469472
}
470473
// ggml_backend_synchronize(split->dst_inputs[0]->backend);

ggml-metal.m

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
id<MTLBuffer> buffer;
2121
};
2222

23+
static void * g_ptr_base = (void *)0x1000;
24+
2325
struct ggml_metal_context {
2426
int n_cb;
2527

@@ -222,8 +224,8 @@ void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb) {
222224
default: {}
223225
}
224226

225-
*offs = (size_t) tensor->data;
226-
printf("%s: offs = %zu\n", __func__, *offs);
227+
*offs = (size_t) tensor->data - (size_t) g_ptr_base;
228+
printf("%s: offs = %zu, %p\n", __func__, *offs, tensor->extra);
227229
return ((struct ggml_metal_buffer_wrapper *) tensor->extra)->buffer;
228230
}
229231

@@ -917,7 +919,7 @@ static void ggml_backend_metal_free_data(struct ggml_backend_buffer * alloc) {
917919

918920
printf("XXXXXXXXXXXXXXX ALOC: %p %p %p size = %zu\n", (void * )wrapper, (void *)&wrapper->buffer, (void *)[wrapper->buffer contents], size);
919921

920-
struct ggml_backend_buffer * buffer = ggml_allocator_simple_init(nil, size, TENSOR_ALIGNMENT);
922+
struct ggml_backend_buffer * buffer = ggml_allocator_simple_init(g_ptr_base, size, TENSOR_ALIGNMENT);
921923
buffer->interface.init_tensor = ggml_backend_metal_init_tensor;
922924
buffer->interface.free_data = ggml_backend_metal_free_data;
923925
buffer->backend_data = wrapper;
@@ -932,7 +934,7 @@ static void ggml_backend_metal_set_tensor_async(struct ggml_backend * backend, s
932934
struct ggml_metal_buffer_wrapper * wrapper = (struct ggml_metal_buffer_wrapper *)tensor->extra;
933935
char * contents = (char *)[wrapper->buffer contents];
934936

935-
const size_t t_data = (size_t) tensor->data;
937+
const size_t t_data = (size_t) tensor->data - (size_t) g_ptr_base;
936938

937939
printf("XXXXXXXXXXXXXXX SET : %p %p %p offset = %zu\n", (void *)(tensor->data), (void *)&wrapper->buffer, (void *)contents, offset);
938940

@@ -945,12 +947,13 @@ static void ggml_backend_metal_set_tensor_async(struct ggml_backend * backend, s
945947

946948
static void ggml_backend_metal_get_tensor_async(struct ggml_backend * backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
947949
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
950+
printf("XXXXXXXXXXXXXXX GET : %d %p, backend = %s\n", (void *)(tensor->data), (void *)tensor->extra, tensor->backend->interface.get_name(tensor->backend));
948951
GGML_ASSERT(tensor->extra != nil && "tensor not allocated");
949952

950953
struct ggml_metal_buffer_wrapper * wrapper = (struct ggml_metal_buffer_wrapper *)tensor->extra;
951954
char * contents = (char *)[wrapper->buffer contents];
952955

953-
const size_t t_data = (size_t) tensor->data;
956+
const size_t t_data = (size_t) tensor->data - (size_t) g_ptr_base;
954957

955958
printf("XXXXXXXXXXXXXXX GET : %p %p %p offset = %zu\n", (void *)(tensor->data), (void *)&wrapper->buffer, (void *)contents, offset);
956959

llama.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1370,10 +1370,10 @@ static ggml_graph_splits llama_build_graph(
13701370
struct ggml_tensor * tmpv = ggml_mul_mat(ctx_l, model.layers[il].wv, cur);
13711371
ggml_set_name(tmpv, "tmpv");
13721372

1373-
struct ggml_tensor * Kcur = ggml_rope_custom_inplace(ctx_l, ggml_reshape_3d(ctx_l, tmpk, n_embd/n_head, n_head, N), n_past, n_rot, 0, freq_base, freq_scale, 0);
1373+
struct ggml_tensor * Kcur = ggml_rope(ctx_l, ggml_reshape_3d(ctx_l, tmpk, n_embd/n_head, n_head, N), n_past, n_rot, 0, 0);
13741374
ggml_set_name(Kcur, "Kcur");
13751375

1376-
struct ggml_tensor * Qcur = ggml_rope_custom_inplace(ctx_l, ggml_reshape_3d(ctx_l, tmpq, n_embd/n_head, n_head, N), n_past, n_rot, 0, freq_base, freq_scale, 0);
1376+
struct ggml_tensor * Qcur = ggml_rope(ctx_l, ggml_reshape_3d(ctx_l, tmpq, n_embd/n_head, n_head, N), n_past, n_rot, 0, 0);
13771377
ggml_set_name(Qcur, "Qcur");
13781378

13791379
struct ggml_tensor * Vcur = ggml_transpose(ctx_l, ggml_reshape_2d(ctx_l, tmpv, n_embd, N));

0 commit comments

Comments
 (0)