File tree 2 files changed +1
-9
lines changed
2 files changed +1
-9
lines changed Original file line number Diff line number Diff line change @@ -158,8 +158,6 @@ int main(int argc, char ** argv) {
158
158
159
159
LOG_INF (" \n\n " );
160
160
161
- const int n_ctx = llama_n_ctx (ctx);
162
-
163
161
std::vector<client> clients (n_clients);
164
162
for (size_t i = 0 ; i < clients.size (); ++i) {
165
163
auto & client = clients[i];
Original file line number Diff line number Diff line change @@ -1959,13 +1959,7 @@ struct server_context {
1959
1959
1960
1960
// the update_slots() logic will always submit a maximum of n_batch or n_parallel tokens
1961
1961
// note that n_batch can be > n_ctx (e.g. for non-causal attention models such as BERT where the KV cache is not used)
1962
- {
1963
- const int32_t n_batch = llama_n_batch (ctx);
1964
-
1965
- // only a single seq_id per token is needed
1966
- batch.clear ();
1967
- }
1968
-
1962
+ batch.clear ();
1969
1963
metrics.init ();
1970
1964
}
1971
1965
You can’t perform that action at this time.
0 commit comments