Skip to content

Commit 3c31292

Browse files
committed
try fix memory for perplexity
1 parent 424281a commit 3c31292

File tree

2 files changed

+6
-4
lines changed

2 files changed

+6
-4
lines changed

llama.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ static bool llama_eval_internal(
632632
auto & mem_at_token1 = lctx.mem_at_token1;
633633

634634
// TODO: fix this hardcoded size
635-
static size_t buf_size = size_t(n_ctx)*1024*1024;
635+
static size_t buf_size = size_t(n_ctx)*size_t(N)*128*1024;
636636
static void * buf = malloc(buf_size);
637637

638638
const size_t C0 = mem_at_token0; // ~base

main.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -220,12 +220,14 @@ int main(int argc, char ** argv) {
220220
// TODO: better way to do that
221221
// TODO(Green-Sky): move to internal and detect first time usage
222222
{
223+
// perplexity uses context size as batch size (?)
224+
const auto tmp_batch_size = params.perplexity ? params.n_ctx : params.n_batch;
223225
// we make 2 evals, of batchsize to take 2 measurements, to determine base and growth
224-
std::vector<llama_token> tmp(params.n_batch*2, 2);
226+
std::vector<llama_token> tmp(tmp_batch_size*2, 2);
225227
tmp[0] = llama_token_bos();
226228

227-
llama_eval(ctx, tmp.data(), params.n_batch, 0, params.n_threads);
228-
llama_eval(ctx, tmp.data()+params.n_batch, params.n_batch, params.n_batch, params.n_threads);
229+
llama_eval(ctx, tmp.data(), tmp_batch_size, 0, params.n_threads);
230+
llama_eval(ctx, tmp.data()+tmp_batch_size, tmp_batch_size, tmp_batch_size, params.n_threads);
229231
}
230232

231233
if (params.perplexity) {

0 commit comments

Comments
 (0)