Skip to content

Commit 95ce058

Browse files
committed
llama: propagating the results of graph_compute to the user interface
1 parent 23e0d70 commit 95ce058

File tree

1 file changed

+28
-4
lines changed

1 file changed

+28
-4
lines changed

src/llama.cpp

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16533,7 +16533,7 @@ static void llama_output_reorder(struct llama_context * ctx) {
1653316533
}
1653416534
}
1653516535

16536-
static void llama_graph_compute(
16536+
static enum ggml_status llama_graph_compute(
1653716537
llama_context & lctx,
1653816538
ggml_cgraph * gf,
1653916539
int n_threads,
@@ -16555,9 +16555,11 @@ static void llama_graph_compute(
1655516555
}
1655616556
#endif
1655716557

16558-
ggml_backend_sched_graph_compute_async(lctx.sched, gf);
16558+
auto status = ggml_backend_sched_graph_compute_async(lctx.sched, gf);
1655916559

1656016560
// fprintf(stderr, "splits: %d\n", ggml_backend_sched_get_n_splits(lctx.sched));
16561+
16562+
return status;
1656116563
}
1656216564

1656316565
// decode a batch of tokens by evaluating the transformer
@@ -16739,7 +16741,18 @@ static int llama_decode_internal(
1673916741

1674016742
llama_set_inputs(lctx, ubatch);
1674116743

16742-
llama_graph_compute(lctx, gf, n_threads, threadpool);
16744+
const auto compute_status = llama_graph_compute(lctx, gf, n_threads, threadpool);
16745+
switch (compute_status) {
16746+
case GGML_STATUS_SUCCESS:
16747+
break;
16748+
case GGML_STATUS_ABORTED:
16749+
return 2;
16750+
case GGML_STATUS_ALLOC_FAILED:
16751+
return -2;
16752+
case GGML_STATUS_FAILED:
16753+
default:
16754+
return -3;
16755+
}
1674316756

1674416757
// update the kv ring buffer
1674516758
{
@@ -16959,7 +16972,18 @@ static int llama_encode_internal(
1695916972

1696016973
llama_set_inputs(lctx, ubatch);
1696116974

16962-
llama_graph_compute(lctx, gf, n_threads, threadpool);
16975+
const auto compute_status = llama_graph_compute(lctx, gf, n_threads, threadpool);
16976+
switch (compute_status) {
16977+
case GGML_STATUS_SUCCESS:
16978+
break;
16979+
case GGML_STATUS_ABORTED:
16980+
return 2;
16981+
case GGML_STATUS_ALLOC_FAILED:
16982+
return -2;
16983+
case GGML_STATUS_FAILED:
16984+
default:
16985+
return -3;
16986+
}
1696316987

1696416988
// extract embeddings
1696516989
if (embd) {

0 commit comments

Comments
 (0)