Skip to content

Commit ab0e26b

Browse files
authored
llama : remove cfg smooth factor as it is only a reparameterization of the guidance scale (#2280)
1 parent 73643f5 commit ab0e26b

File tree

5 files changed

+4
-24
lines changed

5 files changed

+4
-24
lines changed

examples/common.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -260,12 +260,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
260260
break;
261261
}
262262
params.cfg_scale = std::stof(argv[i]);
263-
} else if (arg == "--cfg-smooth-factor") {
264-
if (++i >= argc) {
265-
invalid_param = true;
266-
break;
267-
}
268-
params.cfg_smooth_factor = std::stof(argv[i]);
269263
} else if (arg == "-b" || arg == "--batch-size") {
270264
if (++i >= argc) {
271265
invalid_param = true;
@@ -509,7 +503,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
509503
fprintf(stderr, " --cfg-negative-prompt PROMPT \n");
510504
fprintf(stderr, " negative prompt to use for guidance. (default: empty)\n");
511505
fprintf(stderr, " --cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
512-
fprintf(stderr, " --cfg-smooth-factor N smooth factor between old and new logits (default: %f, 1.0 = no smoothing)\n", params.cfg_smooth_factor);
513506
fprintf(stderr, " -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
514507
fprintf(stderr, " --rope-freq-base N RoPE base frequency (default: %.1f)\n", params.rope_freq_base);
515508
fprintf(stderr, " --rope-freq-scale N RoPE frequency scaling factor (default: %g)\n", params.rope_freq_scale);

examples/common.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ struct gpt_params {
5555
// https://arxiv.org/abs/2306.17806
5656
std::string cfg_negative_prompt; // string to help guidance
5757
float cfg_scale = 1.f; // How strong is guidance
58-
float cfg_smooth_factor = 1.f; // Smooth factor between old and new logits
5958

6059
std::string model = "models/7B/ggml-model.bin"; // model path
6160
std::string model_alias = "unknown"; // model alias

examples/main/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ int main(int argc, char ** argv) {
557557
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
558558

559559
if (ctx_guidance) {
560-
llama_sample_classifier_free_guidance(ctx, &candidates_p, ctx_guidance, params.cfg_scale, params.cfg_smooth_factor);
560+
llama_sample_classifier_free_guidance(ctx, &candidates_p, ctx_guidance, params.cfg_scale);
561561
}
562562

563563
// Apply penalties

llama.cpp

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2218,8 +2218,7 @@ void llama_sample_classifier_free_guidance(
22182218
struct llama_context * ctx,
22192219
llama_token_data_array * candidates,
22202220
struct llama_context * guidance_ctx,
2221-
float scale,
2222-
float smooth_factor) {
2221+
float scale) {
22232222
int64_t t_start_sample_us = ggml_time_us();
22242223

22252224
assert(ctx);
@@ -2240,16 +2239,7 @@ void llama_sample_classifier_free_guidance(
22402239
for (int i = 0; i < n_vocab; ++i) {
22412240
float logit_guidance = logits_guidance[i];
22422241
float logit_base = logits_base[i];
2243-
logits_guidance[i] = scale * (logit_base - logit_guidance) + logit_guidance;
2244-
}
2245-
2246-
llama_log_softmax(logits_guidance, n_vocab);
2247-
2248-
for (int i = 0; i < n_vocab; ++i) {
2249-
float logit_base = logits_base[i];
2250-
float logit_guidance = logits_guidance[i];
2251-
2252-
candidates->data[i].logit = smooth_factor * logit_guidance + (1.f - smooth_factor) * logit_base;
2242+
candidates->data[i].logit = scale * (logit_base - logit_guidance) + logit_guidance;
22532243
}
22542244

22552245
if (ctx) {

llama.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -344,13 +344,11 @@ extern "C" {
344344
/// @param candidates A vector of `llama_token_data` containing the candidate tokens, the logits must be directly extracted from the original generation context without being sorted.
345345
/// @params guidance_ctx A separate context from the same model. Other than a negative prompt at the beginning, it should have all generated and user input tokens copied from the main context.
346346
/// @params scale Guidance strength. 1.0f means no guidance. Higher values mean stronger guidance.
347-
/// @params smooth_factor Smooth factor between guidance logits and original logits. 1.0f means only use guidance logits. 0.0f means only original logits.
348347
LLAMA_API void llama_sample_classifier_free_guidance(
349348
struct llama_context * ctx,
350349
llama_token_data_array * candidates,
351350
struct llama_context * guidance_ctx,
352-
float scale,
353-
float smooth_factor);
351+
float scale);
354352

355353
/// @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
356354
LLAMA_API void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * candidates);

0 commit comments

Comments
 (0)