tests : disable grad / opt + minor naming changes

ggerganov · ggerganov · commit 8e1f0b6865f1 · 2023-07-06T20:30:40.000+03:00
diff --git a/llama.cpp b/llama.cpp
@@ -1268,7 +1268,7 @@ static bool llama_eval_internal(
            const float * embd,
              const int   n_tokens,
              const int   n_past,
-             const int   n_threads,
+                   int   n_threads,
             const char * cgraph_fname) {
 
     LLAMA_ASSERT((!tokens && embd) || (tokens && !embd));
@@ -1309,10 +1309,11 @@ static bool llama_eval_internal(
 
     struct ggml_context * ctx0 = ggml_init(params);
 
+    ggml_cgraph gf = {};
+
     // for big prompts, if BLAS is enabled, it is better to use only one thread
     // otherwise, the threads are spin-lock waiting for the BLAS calls and are degrading the performance
-    ggml_cgraph gf = {};
-    const int actual_n_threads = N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas() ? 1 : n_threads;
+    n_threads = N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas() ? 1 : n_threads;
 
     struct ggml_tensor * cur;
     struct ggml_tensor * inpL;
@@ -1622,7 +1623,7 @@ static bool llama_eval_internal(
 #endif
 
     if (call_ggml_graph_compute) {
-        ggml_cplan pf = ggml_graph_plan(&gf, actual_n_threads);
+        ggml_cplan pf = ggml_graph_plan(&gf, n_threads);
         if (pf.work_size > 0) {
             lctx.work_buffer.resize(pf.work_size);
             pf.work_data = lctx.work_buffer.data();
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -10,5 +10,5 @@ llama_add_test(test-quantize-fns.cpp)
 llama_add_test(test-quantize-perf.cpp)
 llama_add_test(test-sampling.cpp)
 llama_add_test(test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin)
-llama_add_test(test-grad0.c) # SLOW
-llama_add_test(test-opt.c) # SLOW
+# llama_add_test(test-grad0.c) # SLOW
+# llama_add_test(test-opt.c) # SLOW