Nexesenex
diff --git a/‎.github/workflows/python-check-requirements.yml
Lines changed: 2 additions & 4 deletions b/‎.github/workflows/python-check-requirements.yml
Lines changed: 2 additions & 4 deletions
diff --git a/‎examples/llava/requirements.txt
Lines changed: 1 addition & 1 deletion b/‎examples/llava/requirements.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/server/server.cpp
Lines changed: 3 additions & 2 deletions b/‎examples/server/server.cpp
Lines changed: 3 additions & 2 deletions
diff --git a/‎ggml/src/ggml.c
Lines changed: 1 addition & 1 deletion b/‎ggml/src/ggml.c
Lines changed: 1 addition & 1 deletion
@@ -6,15 +6,13 @@ on:
       - '.github/workflows/python-check-requirements.yml'
       - 'scripts/check-requirements.sh'
       - 'convert*.py'
-      - 'requirements.txt'
-      - 'requirements/*.txt'
+      - '**/requirements*.txt'
   pull_request:
     paths:
       - '.github/workflows/python-check-requirements.yml'
       - 'scripts/check-requirements.sh'
       - 'convert*.py'
-      - 'requirements.txt'
-      - 'requirements/*.txt'
+      - '**/requirements*.txt'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
 
@@ -2,4 +2,4 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 pillow~=10.2.0
 torch~=2.2.1
-torchvision==0.17.1
+torchvision~=0.17.1
@@ -631,6 +631,7 @@ struct server_context {
 
     bool clean_kv_cache = true;
     bool add_bos_token  = true;
+    bool has_eos_token  = false;
 
     int32_t n_ctx; // total context for all clients / slots
 
@@ -693,7 +694,7 @@ struct server_context {
         n_ctx = llama_n_ctx(ctx);
 
         add_bos_token = llama_should_add_bos_token(model);
-        GGML_ASSERT(llama_add_eos_token(model) != 1);
+        has_eos_token = llama_add_eos_token(model) != 1;
 
         return true;
     }
@@ -1031,7 +1032,7 @@ struct server_context {
         {
             slot.sparams.logit_bias.clear();
 
-            if (json_value(data, "ignore_eos", false)) {
+            if (json_value(data, "ignore_eos", false) && has_eos_token) {
                 slot.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
             }
 
 
@@ -21129,7 +21129,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                 (int64_t) info->ne[2] *
                 (int64_t) info->ne[3];
 
-            if (ne % ggml_blck_size(info->type) != 0) {
+            if (ggml_blck_size(info->type) == 0 || ne % ggml_blck_size(info->type) != 0) {
                 fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
                         __func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type));
                 fclose(file);
Original file line number	Diff line number	Diff line change
`@@ -631,6 +631,7 @@ struct server_context {`
`631`	`631`
`632`	`632`	`bool clean_kv_cache = true;`
`633`	`633`	`bool add_bos_token = true;`
	`634`	`+ bool has_eos_token = false;`
`634`	`635`
`635`	`636`	`int32_t n_ctx; // total context for all clients / slots`
`636`	`637`
`@@ -693,7 +694,7 @@ struct server_context {`
`693`	`694`	`n_ctx = llama_n_ctx(ctx);`
`694`	`695`
`695`	`696`	`add_bos_token = llama_should_add_bos_token(model);`
`696`		`- GGML_ASSERT(llama_add_eos_token(model) != 1);`
	`697`	`+ has_eos_token = llama_add_eos_token(model) != 1;`
`697`	`698`
`698`	`699`	`return true;`
`699`	`700`	`}`
`@@ -1031,7 +1032,7 @@ struct server_context {`
`1031`	`1032`	`{`
`1032`	`1033`	`slot.sparams.logit_bias.clear();`
`1033`	`1034`
`1034`		`- if (json_value(data, "ignore_eos", false)) {`
	`1035`	`+ if (json_value(data, "ignore_eos", false) && has_eos_token) {`
`1035`	`1036`	`slot.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;`
`1036`	`1037`	`}`
`1037`	`1038`