@@ -17,27 +17,27 @@ enum class llama_sampler_type : char {
17
17
18
18
// sampling parameters
19
19
typedef struct gpt_sampling_params {
20
- uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampling_context
21
- int32_t n_prev = 64 ; // number of previous tokens to remember
22
- int32_t n_probs = 0 ; // if greater than 0, output the probabilities of top n_probs tokens.
23
- int32_t min_keep = 0 ; // 0 = disabled, otherwise samplers should return at least min_keep tokens
24
- int32_t top_k = 40 ; // <= 0 to use vocab size
25
- float top_p = 0 .95f ; // 1.0 = disabled
26
- float min_p = 0 .05f ; // 0.0 = disabled
27
- float tfs_z = 1 .00f ; // 1.0 = disabled
28
- float typical_p = 1 .00f ; // 1.0 = disabled
29
- float temp = 0 .80f ; // <= 0.0 to sample greedily, 0.0 to not output probabilities
30
- float dynatemp_range = 0 .00f ; // 0.0 = disabled
31
- float dynatemp_exponent = 1 .00f ; // controls how entropy maps to temperature in dynamic temperature sampler
32
- int32_t penalty_last_n = 64 ; // last n tokens to penalize (0 = disable penalty, -1 = context size)
33
- float penalty_repeat = 1 .00f ; // 1.0 = disabled
34
- float penalty_freq = 0 .00f ; // 0.0 = disabled
35
- float penalty_present = 0 .00f ; // 0.0 = disabled
36
- int32_t mirostat = 0 ; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
37
- float mirostat_tau = 5 .00f ; // target entropy
38
- float mirostat_eta = 0 .10f ; // learning rate
39
- bool penalize_nl = false ; // consider newlines as a repeatable token
40
- bool ignore_eos = false ;
20
+ uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampling_context
21
+ int32_t n_prev = 64 ; // number of previous tokens to remember
22
+ int32_t n_probs = 0 ; // if greater than 0, output the probabilities of top n_probs tokens.
23
+ int32_t min_keep = 0 ; // 0 = disabled, otherwise samplers should return at least min_keep tokens
24
+ int32_t top_k = 40 ; // <= 0 to use vocab size
25
+ float top_p = 0 .95f ; // 1.0 = disabled
26
+ float min_p = 0 .05f ; // 0.0 = disabled
27
+ float tfs_z = 1 .00f ; // 1.0 = disabled
28
+ float typical_p = 1 .00f ; // 1.0 = disabled
29
+ float temp = 0 .80f ; // <= 0.0 to sample greedily, 0.0 to not output probabilities
30
+ float dynatemp_range = 0 .00f ; // 0.0 = disabled
31
+ float dynatemp_exponent = 1 .00f ; // controls how entropy maps to temperature in dynamic temperature sampler
32
+ int32_t penalty_last_n = 64 ; // last n tokens to penalize (0 = disable penalty, -1 = context size)
33
+ float penalty_repeat = 1 .00f ; // 1.0 = disabled
34
+ float penalty_freq = 0 .00f ; // 0.0 = disabled
35
+ float penalty_present = 0 .00f ; // 0.0 = disabled
36
+ int32_t mirostat = 0 ; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
37
+ float mirostat_tau = 5 .00f ; // target entropy
38
+ float mirostat_eta = 0 .10f ; // learning rate
39
+ bool penalize_nl = false ; // consider newlines as a repeatable token
40
+ bool ignore_eos = false ;
41
41
42
42
std::vector<llama_sampler_type> samplers_sequence = {
43
43
llama_sampler_type::TOP_K,
@@ -68,8 +68,6 @@ struct llama_sampling_context {
68
68
69
69
std::vector<llama_token_data> cur;
70
70
std::vector<llama_token_data> org;
71
-
72
- size_t n_valid; // Number of correct top tokens with correct probabilities.
73
71
};
74
72
75
73
// Create a new sampling context instance.
0 commit comments