File tree Expand file tree Collapse file tree 2 files changed +7
-6
lines changed Expand file tree Collapse file tree 2 files changed +7
-6
lines changed Original file line number Diff line number Diff line change @@ -1268,7 +1268,7 @@ static bool llama_eval_internal(
1268
1268
const float * embd,
1269
1269
const int n_tokens,
1270
1270
const int n_past,
1271
- const int n_threads,
1271
+ int n_threads,
1272
1272
const char * cgraph_fname) {
1273
1273
1274
1274
LLAMA_ASSERT ((!tokens && embd) || (tokens && !embd));
@@ -1309,10 +1309,11 @@ static bool llama_eval_internal(
1309
1309
1310
1310
struct ggml_context * ctx0 = ggml_init (params);
1311
1311
1312
+ ggml_cgraph gf = {};
1313
+
1312
1314
// for big prompts, if BLAS is enabled, it is better to use only one thread
1313
1315
// otherwise, the threads are spin-lock waiting for the BLAS calls and are degrading the performance
1314
- ggml_cgraph gf = {};
1315
- const int actual_n_threads = N >= 32 && ggml_cpu_has_blas () && !ggml_cpu_has_gpublas () ? 1 : n_threads;
1316
+ n_threads = N >= 32 && ggml_cpu_has_blas () && !ggml_cpu_has_gpublas () ? 1 : n_threads;
1316
1317
1317
1318
struct ggml_tensor * cur;
1318
1319
struct ggml_tensor * inpL;
@@ -1622,7 +1623,7 @@ static bool llama_eval_internal(
1622
1623
#endif
1623
1624
1624
1625
if (call_ggml_graph_compute) {
1625
- ggml_cplan pf = ggml_graph_plan (&gf, actual_n_threads );
1626
+ ggml_cplan pf = ggml_graph_plan (&gf, n_threads );
1626
1627
if (pf.work_size > 0 ) {
1627
1628
lctx.work_buffer .resize (pf.work_size );
1628
1629
pf.work_data = lctx.work_buffer .data ();
Original file line number Diff line number Diff line change @@ -10,5 +10,5 @@ llama_add_test(test-quantize-fns.cpp)
10
10
llama_add_test(test -quantize-perf.cpp)
11
11
llama_add_test(test -sampling.cpp)
12
12
llama_add_test(test -tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR} /../models/ggml-vocab.bin)
13
- llama_add_test(test -grad0.c) # SLOW
14
- llama_add_test(test -opt.c) # SLOW
13
+ # llama_add_test(test-grad0.c) # SLOW
14
+ # llama_add_test(test-opt.c) # SLOW
You can’t perform that action at this time.
0 commit comments