@@ -171,56 +171,43 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
171
171
params.penalize_nl ,
172
172
params.ignore_eos ));
173
173
174
- if (params.temp >= 0 .0f ) {
175
- if (params.mirostat == 0 ) {
176
- for (const auto & cnstr : params.samplers ) {
177
- switch (cnstr) {
178
- case COMMON_SAMPLER_TYPE_TOP_K:
179
- llama_sampler_chain_add (result->chain , llama_sampler_init_top_k (params.top_k ));
180
- break ;
181
- case COMMON_SAMPLER_TYPE_TOP_P:
182
- llama_sampler_chain_add (result->chain , llama_sampler_init_top_p (params.top_p , params.min_keep ));
183
- break ;
184
- case COMMON_SAMPLER_TYPE_MIN_P:
185
- llama_sampler_chain_add (result->chain , llama_sampler_init_min_p (params.min_p , params.min_keep ));
186
- break ;
187
- case COMMON_SAMPLER_TYPE_XTC:
188
- llama_sampler_chain_add (result->chain , llama_sampler_init_xtc (params.xtc_probability , params.xtc_threshold , params.min_keep , params.seed ));
189
- break ;
190
- case COMMON_SAMPLER_TYPE_TFS_Z:
191
- llama_sampler_chain_add (result->chain , llama_sampler_init_tail_free (params.tfs_z , params.min_keep ));
192
- break ;
193
- case COMMON_SAMPLER_TYPE_TYPICAL_P:
194
- llama_sampler_chain_add (result->chain , llama_sampler_init_typical (params.typ_p , params.min_keep ));
195
- break ;
196
- case COMMON_SAMPLER_TYPE_TEMPERATURE:
197
- llama_sampler_chain_add (result->chain , llama_sampler_init_temp_ext (params.temp , params.dynatemp_range , params.dynatemp_exponent ));
198
- break ;
199
- default :
200
- GGML_ASSERT (false && " unknown sampler type" );
201
- }
174
+ if (params.mirostat == 0 ) {
175
+ for (const auto & cnstr : params.samplers ) {
176
+ switch (cnstr) {
177
+ case COMMON_SAMPLER_TYPE_TOP_K:
178
+ llama_sampler_chain_add (result->chain , llama_sampler_init_top_k (params.top_k ));
179
+ break ;
180
+ case COMMON_SAMPLER_TYPE_TOP_P:
181
+ llama_sampler_chain_add (result->chain , llama_sampler_init_top_p (params.top_p , params.min_keep ));
182
+ break ;
183
+ case COMMON_SAMPLER_TYPE_MIN_P:
184
+ llama_sampler_chain_add (result->chain , llama_sampler_init_min_p (params.min_p , params.min_keep ));
185
+ break ;
186
+ case COMMON_SAMPLER_TYPE_XTC:
187
+ llama_sampler_chain_add (result->chain , llama_sampler_init_xtc (params.xtc_probability , params.xtc_threshold , params.min_keep , params.seed ));
188
+ break ;
189
+ case COMMON_SAMPLER_TYPE_TFS_Z:
190
+ llama_sampler_chain_add (result->chain , llama_sampler_init_tail_free (params.tfs_z , params.min_keep ));
191
+ break ;
192
+ case COMMON_SAMPLER_TYPE_TYPICAL_P:
193
+ llama_sampler_chain_add (result->chain , llama_sampler_init_typical (params.typ_p , params.min_keep ));
194
+ break ;
195
+ case COMMON_SAMPLER_TYPE_TEMPERATURE:
196
+ llama_sampler_chain_add (result->chain , llama_sampler_init_temp_ext (params.temp , params.dynatemp_range , params.dynatemp_exponent ));
197
+ break ;
198
+ default :
199
+ GGML_ASSERT (false && " unknown sampler type" );
202
200
}
203
- llama_sampler_chain_add (result->chain , llama_sampler_init_dist (params.seed ));
204
- } else if (params.mirostat == 1 ) {
205
- llama_sampler_chain_add (result->chain , llama_sampler_init_temp (params.temp ));
206
- llama_sampler_chain_add (result->chain , llama_sampler_init_mirostat (llama_n_vocab (model), params.seed , params.mirostat_tau , params.mirostat_eta , 100 ));
207
- } else if (params.mirostat == 2 ) {
208
- llama_sampler_chain_add (result->chain , llama_sampler_init_temp (params.temp ));
209
- llama_sampler_chain_add (result->chain , llama_sampler_init_mirostat_v2 (params.seed , params.mirostat_tau , params.mirostat_eta ));
210
- } else {
211
- GGML_ASSERT (false && " unknown mirostat version" );
212
201
}
202
+ llama_sampler_chain_add (result->chain , llama_sampler_init_dist (params.seed ));
203
+ } else if (params.mirostat == 1 ) {
204
+ llama_sampler_chain_add (result->chain , llama_sampler_init_temp (params.temp ));
205
+ llama_sampler_chain_add (result->chain , llama_sampler_init_mirostat (llama_n_vocab (model), params.seed , params.mirostat_tau , params.mirostat_eta , 100 ));
206
+ } else if (params.mirostat == 2 ) {
207
+ llama_sampler_chain_add (result->chain , llama_sampler_init_temp (params.temp ));
208
+ llama_sampler_chain_add (result->chain , llama_sampler_init_mirostat_v2 (params.seed , params.mirostat_tau , params.mirostat_eta ));
213
209
} else {
214
- // negative temperatures will trigger "greedy" sampling: simply take the most likely token each time
215
- if (params.n_probs > 0 ) {
216
- // some use cases require to sample greedily, but still obtain the probabilities of the top tokens
217
- // ref: https://github.com/ggerganov/llama.cpp/pull/9605
218
- //
219
- // the following will not produce exactly the same probs as applyging softmax to the full vocabulary, but
220
- // it is much faster, since we avoid sorting all tokens and should give a good approximation
221
- llama_sampler_chain_add (result->chain , llama_sampler_init_top_k (params.n_probs ));
222
- }
223
- llama_sampler_chain_add (result->chain , llama_sampler_init_greedy ());
210
+ GGML_ASSERT (false && " unknown mirostat version" );
224
211
}
225
212
226
213
return result;
0 commit comments