Skip to content

Commit 8703a13

Browse files
committed
ggml: add names to tensors
1 parent ea3a0ad commit 8703a13

File tree

2 files changed

+60
-18
lines changed

2 files changed

+60
-18
lines changed

ggml.c

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4524,6 +4524,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
45244524
/*.perf_cycles =*/ 0,
45254525
/*.perf_time_us =*/ 0,
45264526
/*.data =*/ (data == NULL && !ctx->no_alloc) ? (void *)(result + 1) : data,
4527+
/*.name =*/ { 0 },
45274528
/*.pad =*/ { 0 },
45284529
};
45294530

@@ -4878,6 +4879,15 @@ float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
48784879
return (float *)(tensor->data);
48794880
}
48804881

4882+
const char * ggml_get_name(const struct ggml_tensor * tensor) {
4883+
return tensor->name;
4884+
}
4885+
4886+
void ggml_set_name(struct ggml_tensor * tensor, const char * name) {
4887+
strncpy(tensor->name, name, sizeof(tensor->name));
4888+
tensor->name[sizeof(tensor->name) - 1] = '\0';
4889+
}
4890+
48814891
struct ggml_tensor * ggml_view_tensor(
48824892
struct ggml_context * ctx,
48834893
const struct ggml_tensor * src) {
@@ -5977,6 +5987,7 @@ struct ggml_tensor * ggml_diag_mask_inf(
59775987
//struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
59785988
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
59795989
struct ggml_tensor * b = ggml_new_i32(ctx, n_past);
5990+
ggml_set_name(b, "n_past");
59805991

59815992
result->op = GGML_OP_DIAG_MASK_INF;
59825993
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6034,6 +6045,7 @@ struct ggml_tensor * ggml_rope(
60346045
((int32_t *) b->data)[0] = n_past;
60356046
((int32_t *) b->data)[1] = n_dims;
60366047
((int32_t *) b->data)[2] = mode;
6048+
ggml_set_name(b, "n_past, n_dims, mode");
60376049

60386050
result->op = GGML_OP_ROPE;
60396051
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -12214,10 +12226,16 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
1221412226
snprintf(color, sizeof(color), "white");
1221512227
}
1221612228

12217-
fprintf(fp, " \"%p\" [ \
12218-
style = filled; fillcolor = %s; shape = record; \
12219-
label=\"%d [%" PRId64 ", %" PRId64 "] | <x>%s",
12220-
(void *) node, color,
12229+
fprintf(fp, " \"%p\" [ "
12230+
"style = filled; fillcolor = %s; shape = record; "
12231+
"label=\"",
12232+
(void *) node, color);
12233+
12234+
if (strlen(node->name) > 0) {
12235+
fprintf(fp, "%s |", node->name);
12236+
}
12237+
12238+
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s",
1222112239
i, node->ne[0], node->ne[1],
1222212240
GGML_OP_SYMBOL[node->op]);
1222312241

@@ -12233,18 +12251,26 @@ label=\"%d [%" PRId64 ", %" PRId64 "] | <x>%s",
1223312251

1223412252
snprintf(color, sizeof(color), "pink");
1223512253

12254+
fprintf(fp, " \"%p\" [ "
12255+
"style = filled; fillcolor = %s; shape = record; "
12256+
"label=\"<x>",
12257+
(void *) node, color);
12258+
12259+
if (strlen(node->name) > 0) {
12260+
fprintf(fp, "%s |", node->name);
12261+
}
1223612262
if (ggml_nelements(node) == 1) {
12237-
fprintf(fp, " \"%p\" [ \
12238-
style = filled; fillcolor = %s; shape = record; \
12239-
label=\"<x>%.1e\"; ]\n",
12240-
(void *) node, color, (double)ggml_get_f32_1d(node, 0));
12241-
} else {
12242-
fprintf(fp, " \"%p\" [ \
12243-
style = filled; fillcolor = %s; shape = record; \
12244-
label=\"<x>CONST %d [%" PRId64 ", %" PRId64 "]\"; ]\n",
12245-
(void *) node, color,
12246-
i, node->ne[0], node->ne[1]);
12263+
if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
12264+
fprintf(fp, "%d", ggml_get_i32_1d(node, 0));
12265+
}
12266+
else {
12267+
fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, 0));
12268+
}
12269+
}
12270+
else {
12271+
fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]\n", i, node->ne[0], node->ne[1]);
1224712272
}
12273+
fprintf(fp, "\"; ]\n");
1224812274
}
1224912275

1225012276
for (int i = 0; i < gb->n_nodes; i++) {

llama.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,7 @@ struct llama_model_loader {
659659
LLAMA_ASSERT(lt.ne.size() == 1);
660660
tensor = ggml_new_tensor_1d(ggml_ctx, lt.type, lt.ne.at(0));
661661
}
662+
ggml_set_name(tensor, lt.name.c_str());
662663
LLAMA_ASSERT(lt.ggml_tensor == NULL); // if this fails, we called get_tensor twice on the same tensor
663664
lt.ggml_tensor = tensor;
664665
num_ggml_tensors_created++;
@@ -798,6 +799,8 @@ static bool kv_cache_init(
798799

799800
cache.k = ggml_new_tensor_1d(cache.ctx, wtype, n_elements);
800801
cache.v = ggml_new_tensor_1d(cache.ctx, wtype, n_elements);
802+
ggml_set_name(cache.k, "cache_k");
803+
ggml_set_name(cache.v, "cache_v");
801804

802805
return true;
803806
}
@@ -1084,6 +1087,7 @@ static bool llama_eval_internal(
10841087
gf.n_threads = N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas() ? 1 : n_threads;
10851088

10861089
struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
1090+
ggml_set_name(embd, "embd");
10871091
memcpy(embd->data, tokens, N*ggml_element_size(embd));
10881092

10891093
struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd);
@@ -1110,6 +1114,8 @@ static bool llama_eval_internal(
11101114
// compute Q and K and RoPE them
11111115
struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
11121116
struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wk, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
1117+
ggml_set_name(Qcur, "Qcur");
1118+
ggml_set_name(Kcur, "Kcur");
11131119

11141120
// store key and value to memory
11151121
{
@@ -1130,28 +1136,34 @@ static bool llama_eval_internal(
11301136
ggml_permute(ctx0,
11311137
Qcur,
11321138
0, 2, 1, 3);
1139+
ggml_set_name(Q, "Q");
11331140

11341141
struct ggml_tensor * K =
11351142
ggml_permute(ctx0,
11361143
ggml_reshape_3d(ctx0,
11371144
ggml_view_1d(ctx0, kv_self.k, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(kv_self.k)*n_embd),
11381145
n_embd/n_head, n_head, n_past + N),
11391146
0, 2, 1, 3);
1147+
ggml_set_name(K, "K");
11401148

11411149
// K * Q
11421150
struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q);
1151+
ggml_set_name(KQ, "KQ");
11431152

11441153
// KQ_scaled = KQ / sqrt(n_embd/n_head)
1145-
struct ggml_tensor * KQ_scaled =
1146-
ggml_scale(ctx0,
1147-
KQ,
1148-
ggml_new_f32(ctx0, 1.0f/sqrtf(float(n_embd)/n_head)));
1154+
struct ggml_tensor * KQ_scale = ggml_new_f32(ctx0, 1.0f/sqrtf(float(n_embd)/n_head));
1155+
ggml_set_name(KQ_scale, "1/sqrt(n_embd/n_head)");
1156+
1157+
struct ggml_tensor * KQ_scaled = ggml_scale(ctx0, KQ, KQ_scale);
1158+
ggml_set_name(KQ_scaled, "KQ_scaled");
11491159

11501160
// KQ_masked = mask_past(KQ_scaled)
11511161
struct ggml_tensor * KQ_masked = ggml_diag_mask_inf(ctx0, KQ_scaled, n_past);
1162+
ggml_set_name(KQ_masked, "KQ_masked");
11521163

11531164
// KQ = soft_max(KQ_masked)
11541165
struct ggml_tensor * KQ_soft_max = ggml_soft_max(ctx0, KQ_masked);
1166+
ggml_set_name(KQ_soft_max, "KQ_soft_max");
11551167

11561168
// split cached V into n_head heads
11571169
struct ggml_tensor * V =
@@ -1160,9 +1172,11 @@ static bool llama_eval_internal(
11601172
n_ctx*ggml_element_size(kv_self.v),
11611173
n_ctx*ggml_element_size(kv_self.v)*n_embd/n_head,
11621174
il*n_ctx*ggml_element_size(kv_self.v)*n_embd);
1175+
ggml_set_name(V, "V");
11631176

11641177
#if 1
11651178
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ_soft_max);
1179+
ggml_set_name(KQV, "KQV");
11661180
#else
11671181
// make V contiguous in memory to speed up the matmul, however we waste time on the copy
11681182
// on M1 this is faster for the perplexity computation, but ~5% slower for the single-token generation
@@ -1173,11 +1187,13 @@ static bool llama_eval_internal(
11731187

11741188
// KQV_merged = KQV.permute(0, 2, 1, 3)
11751189
struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
1190+
ggml_set_name(KQV_merged, "KQV_merged");
11761191

11771192
// cur = KQV_merged.contiguous().view(n_embd, N)
11781193
cur = ggml_cpy(ctx0,
11791194
KQV_merged,
11801195
ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N));
1196+
ggml_set_name(cur, "KQV_merged_contiguous");
11811197

11821198
// projection (no bias)
11831199
cur = ggml_mul_mat(ctx0,

0 commit comments

Comments
 (0)