Skip to content

Commit 5ee8597

Browse files
author
ochafik
committed
skip-unused: disable skipping on ROCm / when LLAMA_USE_HIPBLAS
1 parent c77ed60 commit 5ee8597

File tree

1 file changed

+14
-1
lines changed

1 file changed

+14
-1
lines changed

llama.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@
3232
#endif
3333
#endif
3434

35+
// TODO: Fix unused logit skipping crashes on ROCm
36+
// (see https://github.com/ggerganov/llama.cpp/pull/2700#issuecomment-1689548127)
37+
#ifndef LLAMA_USE_HIPBLAS
38+
#define LLAMA_SKIP_UNUSED_LOGITS
39+
#endif
40+
3541
#include <array>
3642
#include <ctime>
3743
#include <cinttypes>
@@ -1594,6 +1600,7 @@ static struct ggml_cgraph * llama_build_graph(
15941600
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
15951601
}
15961602

1603+
#ifdef LLAMA_SKIP_UNUSED_LOGITS
15971604
if (il == n_layer - 1 && !lctx.logits_all)
15981605
{
15991606
// From here on, we only care about the last token and its logits.
@@ -1614,6 +1621,7 @@ static struct ggml_cgraph * llama_build_graph(
16141621
n_past += N - 1;
16151622
N = 1;
16161623
}
1624+
#endif // LLAMA_SKIP_UNUSED_LOGITS
16171625

16181626
struct ggml_tensor * tmpq = ggml_mul_mat(ctx0, model.layers[il].wq, cur);
16191627
offload_func_kq(tmpq);
@@ -1920,9 +1928,14 @@ static bool llama_eval_internal(
19201928
memcpy(logits_out.data(), (float *) ggml_get_data(res), sizeof(float)*n_vocab*N);
19211929
} else {
19221930
// return result for just the last token
1923-
GGML_ASSERT(ggml_nelements(res) == n_vocab);
19241931
logits_out.resize(n_vocab);
1932+
#ifdef LLAMA_SKIP_UNUSED_LOGITS
1933+
GGML_ASSERT(ggml_nelements(res) == n_vocab);
19251934
memcpy(logits_out.data(), (float *) ggml_get_data(res), sizeof(float)*n_vocab);
1935+
#else
1936+
GGML_ASSERT(ggml_nelements(res) == n_vocab * N);
1937+
memcpy(logits_out.data(), (float *) ggml_get_data(res) + (n_vocab*(N-1)), sizeof(float)*n_vocab);
1938+
#endif
19261939
}
19271940
}
19281941

0 commit comments

Comments
 (0)