Skip to content

Commit d5e9575

Browse files
Windows test
1 parent b942fe3 commit d5e9575

File tree

1 file changed

+18
-8
lines changed

1 file changed

+18
-8
lines changed

llama.cpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,37 +1480,38 @@ static std::vector<llama_vocab::id> llama_tokenize(const llama_vocab & vocab, co
14801480
//
14811481

14821482
void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * candidates) {
1483+
printf("llama_sample_softmax\n"); fflush(stdout);
14831484
assert(candidates->size > 0);
1484-
printf("llama_sample_softmax\n");
14851485

14861486
const int64_t t_start_sample_us = ggml_time_us();
14871487

1488+
printf("llama_sample_softmax 1\n"); fflush(stdout);
14881489
// Sort the logits in descending order
14891490
if (!candidates->sorted) {
14901491
std::sort(candidates->data, candidates->data + candidates->size, [](const llama_token_data & a, const llama_token_data & b) {
14911492
return a.logit > b.logit;
14921493
});
14931494
candidates->sorted = true;
14941495
}
1495-
printf("llama_sample_softmax 2\n");
1496+
printf("llama_sample_softmax 2\n"); fflush(stdout);
14961497

14971498
float max_l = candidates->data[0].logit;
1498-
printf("max_l = %f\n", max_l);
1499-
fflush(stdout);
1499+
printf("max_l = %f\n", max_l); fflush(stdout);
15001500
float cum_sum = 0.0f;
15011501
for (size_t i = 0; i < candidates->size; ++i) {
1502-
printf("i = %d, logit = %f\n", i, candidates->data[i].logit);
1503-
fflush(stdout);
1502+
printf("i = %d, logit = %f\n", i, candidates->data[i].logit); fflush(stdout);
15041503
float p = expf(candidates->data[i].logit - max_l);
1504+
printf(" p = %f\n", p); fflush(stdout);
15051505
candidates->data[i].p = p;
15061506
cum_sum += p;
1507+
printf(" cum_sum = %f\n", cum_sum); fflush(stdout);
15071508
}
15081509
printf("cum_sum = %f\n", cum_sum);
15091510
fflush(stdout);
15101511
for (size_t i = 0; i < candidates->size; ++i) {
1511-
printf("i = %d, p = %f\n", i, candidates->data[i].logit);
1512-
fflush(stdout);
1512+
printf("i = %d, p = %f\n", i, candidates->data[i].p); fflush(stdout);
15131513
candidates->data[i].p /= cum_sum;
1514+
printf(" p = %f\n", candidates->data[i].p); fflush(stdout);
15141515
}
15151516

15161517
if (ctx) {
@@ -1521,26 +1522,35 @@ void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * c
15211522
void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int k, size_t min_keep) {
15221523
const int64_t t_start_sample_us = ggml_time_us();
15231524

1525+
printf("llama_sample_top_k\n"); fflush(stdout);
15241526
k = std::max(k, (int) min_keep);
15251527
k = std::min(k, (int) candidates->size);
1528+
printf("llama_sample_top_k 2\n"); fflush(stdout);
15261529

15271530
// Sort scores in descending order
15281531
if (!candidates->sorted) {
1532+
printf("llama_sample_top_k 3\n"); fflush(stdout);
15291533
auto comp = [](const llama_token_data & a, const llama_token_data & b) {
1534+
printf("llama_sample_top_k 4\n"); fflush(stdout);
15301535
return a.logit > b.logit;
15311536
};
15321537
if (k == (int) candidates->size) {
1538+
printf("llama_sample_top_k 5\n"); fflush(stdout);
15331539
std::sort(candidates->data, candidates->data + candidates->size, comp);
15341540
} else {
1541+
printf("llama_sample_top_k 6\n"); fflush(stdout);
15351542
std::partial_sort(candidates->data, candidates->data + k, candidates->data + candidates->size, comp);
15361543
}
1544+
printf("llama_sample_top_k 7\n"); fflush(stdout);
15371545
candidates->sorted = true;
15381546
}
15391547
candidates->size = k;
1548+
printf("llama_sample_top_k 8\n"); fflush(stdout);
15401549

15411550
if (ctx) {
15421551
ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
15431552
}
1553+
printf("llama_sample_top_k 9\n"); fflush(stdout);
15441554
}
15451555

15461556
void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep) {

0 commit comments

Comments
 (0)