Skip to content

Commit 35ce3f6

Browse files
committed
Merge branch 'master' into Nexes_CQ_10
2 parents 6480054 + 8c475b9 commit 35ce3f6

File tree

16 files changed

+78
-22
lines changed

16 files changed

+78
-22
lines changed

.github/workflows/build.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ concurrency:
1919
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
2020
cancel-in-progress: true
2121

22+
# Fine-grant permission
23+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
24+
permissions:
25+
contents: write # for creating release
26+
2227
env:
2328
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
2429
GGML_NLOOP: 3

.github/workflows/close-issue.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ on:
33
schedule:
44
- cron: "42 0 * * *"
55

6+
# Fine-grant permission
7+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
8+
permissions:
9+
issues: write
10+
611
jobs:
712
close-issues:
813
runs-on: ubuntu-latest

.github/workflows/nix-ci-aarch64.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@ concurrency:
2121
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
2222
cancel-in-progress: true
2323

24+
# Fine-grant permission
25+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
26+
permissions:
27+
# https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
28+
id-token: write
29+
contents: read
30+
2431
jobs:
2532
nix-build-aarch64:
2633
runs-on: ubuntu-latest

.github/workflows/nix-ci.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@ concurrency:
1212
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
1313
cancel-in-progress: true
1414

15+
# Fine-grant permission
16+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
17+
permissions:
18+
# https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
19+
id-token: write
20+
contents: read
21+
1522
jobs:
1623
nix-eval:
1724
strategy:

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
169169
- [AIKit](https://github.com/sozercan/aikit) (MIT)
170170
- [LARS - The LLM & Advanced Referencing Solution](https://github.com/abgulati/LARS) (AGPL)
171171
- [LLMUnity](https://github.com/undreamai/LLMUnity) (MIT)
172+
- [Llama Assistant](https://github.com/vietanhdev/llama-assistant) (GPL)
172173

173174
*(to have a project listed here, it should clearly state that it depends on `llama.cpp`)*
174175

ci/run.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#/bin/bash
1+
#!/bin/bash
22
#
33
# sample usage:
44
#
@@ -751,7 +751,8 @@ function gg_run_rerank_tiny {
751751

752752
model_f16="${path_models}/ggml-model-f16.gguf"
753753

754-
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s><s>hi\nwhat is panda?</s><s>it's a bear\nwhat is panda?</s><s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
754+
# for this model, the SEP token is "</s>"
755+
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s></s>hi\nwhat is panda?</s></s>it's a bear\nwhat is panda?</s></s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
755756

756757
# sample output
757758
# rerank score 0: 0.029
@@ -774,7 +775,7 @@ function gg_run_rerank_tiny {
774775

775776
check_score "rerank score 0" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 0")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
776777
check_score "rerank score 1" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 1")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
777-
check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.15" | tee -a $OUT/${ci}-rk-f16.log
778+
check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.30" | tee -a $OUT/${ci}-rk-f16.log
778779

779780
set +e
780781
}

common/arg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
911911
).set_sparam());
912912
add_opt(llama_arg(
913913
{"-s", "--seed"}, "SEED",
914-
format("RNG seed (default: %u, use random seed for %u)", params.sparams.seed, LLAMA_DEFAULT_SEED),
914+
format("RNG seed (default: %d, use random seed for %d)", params.sparams.seed, LLAMA_DEFAULT_SEED),
915915
[](gpt_params & params, const std::string & value) {
916916
params.sparams.seed = std::stoul(value);
917917
}

common/common.cpp

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,31 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
838838
return iparams;
839839
}
840840

841+
if (params.reranking) {
842+
bool ok = true;
843+
844+
if (llama_token_bos(model) == LLAMA_TOKEN_NULL) {
845+
LOG_WRN("%s: warning: model does not have a BOS token, reranking will not work\n", __func__);
846+
ok = false;
847+
}
848+
849+
if (llama_token_eos(model) == LLAMA_TOKEN_NULL) {
850+
LOG_WRN("%s: warning: model does not have an EOS token, reranking will not work\n", __func__);
851+
ok = false;
852+
}
853+
854+
if (llama_token_sep(model) == LLAMA_TOKEN_NULL) {
855+
LOG_WRN("%s: warning: model does not have a SEP token, reranking will not work\n", __func__);
856+
ok = false;
857+
}
858+
859+
if (!ok) {
860+
llama_free_model(model);
861+
862+
return iparams;
863+
}
864+
}
865+
841866
auto cparams = llama_context_params_from_gpt_params(params);
842867

843868
llama_context * lctx = llama_new_context_with_model(model, cparams);
@@ -855,6 +880,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
855880
if (cvec.n_embd == -1) {
856881
llama_free(lctx);
857882
llama_free_model(model);
883+
858884
return iparams;
859885
}
860886

@@ -867,6 +893,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
867893
if (err) {
868894
llama_free(lctx);
869895
llama_free_model(model);
896+
870897
return iparams;
871898
}
872899
}
@@ -889,7 +916,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
889916
llama_lora_adapters_apply(lctx, iparams.lora_adapters);
890917
}
891918

892-
if (params.sparams.ignore_eos && llama_token_eos(model) == -1) {
919+
if (params.sparams.ignore_eos && llama_token_eos(model) == LLAMA_TOKEN_NULL) {
893920
LOG_WRN("%s: warning: model does not have an EOS token, ignoring --ignore-eos\n", __func__);
894921
params.sparams.ignore_eos = false;
895922
}
@@ -930,6 +957,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
930957

931958
iparams.model = model;
932959
iparams.context = lctx;
960+
933961
return iparams;
934962
}
935963

examples/server/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ The project is under active development, and we are [looking for feedback and co
100100
| Argument | Explanation |
101101
| -------- | ----------- |
102102
| `--samplers SAMPLERS` | samplers that will be used for generation in the order, separated by ';'<br/>(default: top_k;tfs_z;typ_p;top_p;min_p;temperature) |
103-
| `-s, --seed SEED` | RNG seed (default: 4294967295, use random seed for 4294967295) |
103+
| `-s, --seed SEED` | RNG seed (default: -1, use random seed for -1) |
104104
| `--sampling-seq SEQUENCE` | simplified sequence for samplers that will be used (default: kfypmt) |
105105
| `--ignore-eos` | ignore end of stream token and continue generating (implies --logit-bias EOS-inf) |
106106
| `--penalize-nl` | penalize newline tokens (default: false) |

examples/server/server.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2027,15 +2027,15 @@ struct server_context {
20272027
continue;
20282028
}
20292029

2030-
// prompt: <s>query</s><s>doc</s>
2030+
// prompt: [BOS]query[EOS][SEP]doc[EOS]
20312031
prompt_tokens.clear();
20322032
prompt_tokens.push_back(llama_token_bos(model));
20332033
{
20342034
const auto part = tokenize(slot.prompt[0], false);
20352035
prompt_tokens.insert(prompt_tokens.end(), part.begin(), part.end());
20362036
}
20372037
prompt_tokens.push_back(llama_token_eos(model));
2038-
prompt_tokens.push_back(llama_token_bos(model));
2038+
prompt_tokens.push_back(llama_token_sep(model));
20392039
{
20402040
const auto part = tokenize(slot.prompt[1], false);
20412041
prompt_tokens.insert(prompt_tokens.end(), part.begin(), part.end());

ggml/include/ggml-alloc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, st
2424
// Graph allocator
2525
/*
2626
Example usage:
27-
ggml_gallocr_t galloc = ggml_gallocr_new(ggml_bacckend_cpu_buffer_type());
27+
ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type());
2828
2929
// optional: create a worst-case graph and reserve the buffers to avoid reallocations
3030
ggml_gallocr_reserve(galloc, build_graph(max_batch));

ggml/src/ggml-cuda.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2448,6 +2448,7 @@ static void ggml_backend_cuda_synchronize(ggml_backend_t backend) {
24482448
GGML_UNUSED(backend);
24492449
}
24502450

2451+
#ifdef USE_CUDA_GRAPH
24512452
static void set_ggml_graph_node_properties(ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) {
24522453
graph_node_properties->node_address = node->data;
24532454
graph_node_properties->node_op = node->op;
@@ -2491,6 +2492,7 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra
24912492
}
24922493
return true;
24932494
}
2495+
#endif
24942496

24952497
static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
24962498
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;

ggml/src/ggml-metal.m

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3258,7 +3258,7 @@ static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device, size_t s
32583258
}
32593259

32603260
static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
3261-
struct ggml_backend_metal_buffer_context * ctx = malloc(sizeof(struct ggml_backend_metal_buffer_context));
3261+
struct ggml_backend_metal_buffer_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_buffer_context));
32623262

32633263
const size_t size_page = sysconf(_SC_PAGESIZE);
32643264

@@ -3340,7 +3340,7 @@ ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) {
33403340
// buffer from ptr
33413341

33423342
ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size) {
3343-
struct ggml_backend_metal_buffer_context * ctx = malloc(sizeof(struct ggml_backend_metal_buffer_context));
3343+
struct ggml_backend_metal_buffer_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_buffer_context));
33443344

33453345
ctx->all_data = data;
33463346
ctx->all_size = size;

scripts/sync-ggml.last

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
e5c233e5edbfcfa1d808b9293de9065035c40751
1+
0d7ecbbe536dc84240f646e0ec0a712251377f34

src/llama-vocab.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,17 @@ struct llama_vocab {
4040
id special_bos_id = 1;
4141
id special_eos_id = 2;
4242
id special_unk_id = 0;
43-
id special_sep_id = -1;
44-
id special_pad_id = -1;
45-
id special_cls_id = -1;
46-
id special_mask_id = -1;
43+
id special_sep_id = LLAMA_TOKEN_NULL;
44+
id special_pad_id = LLAMA_TOKEN_NULL;
45+
id special_cls_id = LLAMA_TOKEN_NULL;
46+
id special_mask_id = LLAMA_TOKEN_NULL;
4747

4848
id linefeed_id = 13;
49-
id special_prefix_id = -1;
50-
id special_suffix_id = -1;
51-
id special_middle_id = -1;
52-
id special_eot_id = -1; // TODO: move above after "eos_id", and here add "file separator" token
53-
id special_eom_id = -1;
49+
id special_prefix_id = LLAMA_TOKEN_NULL;
50+
id special_suffix_id = LLAMA_TOKEN_NULL;
51+
id special_middle_id = LLAMA_TOKEN_NULL;
52+
id special_eot_id = LLAMA_TOKEN_NULL; // TODO: move above after "eos_id", and here add "file separator" token
53+
id special_eom_id = LLAMA_TOKEN_NULL;
5454

5555
// set of all tokens that cause "end of generation"
5656
std::set<id> special_eog_ids;

src/llama.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2412,7 +2412,7 @@ struct llama_hparams {
24122412

24132413
// needed by encoder-decoder models (e.g. T5, FLAN-T5)
24142414
// ref: https://github.com/ggerganov/llama.cpp/pull/8141
2415-
llama_token dec_start_token_id = -1;
2415+
llama_token dec_start_token_id = LLAMA_TOKEN_NULL;
24162416

24172417
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_NONE;
24182418
enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE;

0 commit comments

Comments
 (0)