Cohere: use logit_scale just as in CohereForCausalLM.

Judd · Judd · commit 51755b7ec1e4 · 2024-03-14T18:33:00.000+08:00
diff --git a/models.cpp b/models.cpp
@@ -269,7 +269,7 @@ namespace chatllm
         BaseModelForConditionalGeneration(ModelType model_type, BaseConfig config, size_t mem_size, size_t scratch_size)
             : BaseModel(model_type, to_string(model_type), to_native_string(model_type), get_model_purpose(model_type)),
               GRAPH_SIZE(GGML_DEFAULT_GRAPH_SIZE),
-              batch_input(true),
+              batch_input(true), logit_scale(-1.0f),
               config_(config), mem_size_(mem_size), mem_buffer_(new char[mem_size]),
               scratch_size_(scratch_size), scratch_buffer_(new char[scratch_size])
         {
@@ -485,6 +485,9 @@ namespace chatllm
 
             ggml_tensor *r = transformer.forward(&ctx, input_ids_tensor, past);
 
+            if (logit_scale > 0)
+                r = ggml_scale_inplace(ctx.gctx.get(), r, logit_scale);
+
             ggml_build_forward_expand(ctx.gf, r);
             ggml_graph_compute_with_ctx(ctx.gctx.get(), ctx.gf, n_threads);
 
@@ -557,6 +560,7 @@ namespace chatllm
         LM transformer;
         size_t GRAPH_SIZE;
         bool batch_input;
+        float logit_scale;
     private:
         BaseConfig config_;
         size_t mem_size_;
diff --git a/models/cohere.cpp b/models/cohere.cpp
@@ -93,6 +93,8 @@ class ConditionalGeneration : public BaseModelForConditionalGeneration<
             attention.freq_base = config.rope_theta;
         }
 
+        logit_scale = config.logit_scale;
+
         GRAPH_SIZE = 4096;
     }
 

Original file line number	Diff line number	Diff line change
`@@ -93,6 +93,8 @@ class ConditionalGeneration : public BaseModelForConditionalGeneration<`
`93`	`93`	`attention.freq_base = config.rope_theta;`
`94`	`94`	`}`
`95`	`95`
	`96`	`+ logit_scale = config.logit_scale;`
	`97`	`+`
`96`	`98`	`GRAPH_SIZE = 4096;`
`97`	`99`	`}`
`98`	`100`