fix build

ngxson · ngxson · commit 42062cc2c710 · 2025-03-25T18:39:19.000+01:00
diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp
@@ -158,8 +158,6 @@ int main(int argc, char ** argv) {
 
     LOG_INF("\n\n");
 
-    const int n_ctx = llama_n_ctx(ctx);
-
     std::vector<client> clients(n_clients);
     for (size_t i = 0; i < clients.size(); ++i) {
         auto & client = clients[i];
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -1959,13 +1959,7 @@ struct server_context {
 
         // the update_slots() logic will always submit a maximum of n_batch or n_parallel tokens
         // note that n_batch can be > n_ctx (e.g. for non-causal attention models such as BERT where the KV cache is not used)
-        {
-            const int32_t n_batch = llama_n_batch(ctx);
-
-            // only a single seq_id per token is needed
-            batch.clear();
-        }
-
+        batch.clear();
         metrics.init();
     }