@@ -26,9 +26,6 @@ class GptParams:
26
26
model : str = "./models/llama-7B/ggml-model.bin"
27
27
prompt : str = ""
28
28
input_prefix : str = " "
29
- fix_prefix : str = ""
30
- output_postfix : str = ""
31
- input_echo : bool = True ,
32
29
33
30
antiprompt : List [str ] = field (default_factory = list )
34
31
@@ -47,41 +44,57 @@ class GptParams:
47
44
mem_test : bool = False
48
45
verbose_prompt : bool = False
49
46
47
+ file : str = None
48
+
49
+ # If chat ended prematurely, append this to the conversation to fix it.
50
+ # Set to "\nUser:" etc.
51
+ # This is an alternative to input_prefix which always adds it, so it potentially duplicates "User:""
52
+ fix_prefix : str = " "
53
+ output_postfix : str = ""
54
+ input_echo : bool = True ,
55
+
50
56
# Default instructions for Alpaca
51
57
# switch to "Human" and "Assistant" for Vicuna.
52
- instruct_inp_prefix : str = "\n \n ### Instruction:\n \n " ,
53
- instruct_inp_suffix : str = "\n \n ### Response:\n \n " ,
58
+ # TODO: TBD how they are gonna handle this upstream
59
+ instruct_inp_prefix : str = "\n \n ### Instruction:\n \n "
60
+ instruct_inp_suffix : str = "\n \n ### Response:\n \n "
54
61
55
62
56
63
def gpt_params_parse (argv = None , params : Optional [GptParams ] = None ):
57
64
if params is None :
58
65
params = GptParams ()
59
66
60
- parser = argparse .ArgumentParser ()
61
- parser .add_argument ("-h" , "--help" , action = "store_true" , help = "show this help message and exit" )
62
- parser .add_argument ("-s" , "--seed" , type = int , default = - 1 , help = "" ,dest = "seed" )
63
- parser .add_argument ("-t" , "--threads" , type = int , default = 1 , help = "" ,dest = "n_threads" )
64
- parser .add_argument ("-p" , "--prompt" , type = str , default = "" , help = "" ,dest = "prompt" )
65
- parser .add_argument ("-f" , "--file" , type = str , default = None , help = "" )
66
- parser .add_argument ("-c" , "--ctx_size" , type = int , default = 512 , help = "" ,dest = "n_ctx" )
67
- parser .add_argument ("--memory_f32" , action = "store_false" , help = "" ,dest = "memory_f16" )
68
- parser .add_argument ("--top_p" , type = float , default = 0.9 , help = "" ,dest = "top_p" )
69
- parser .add_argument ("--temp" , type = float , default = 1.0 , help = "" ,dest = "temp" )
70
- parser .add_argument ("--repeat_last_n" , type = int , default = 64 , help = "" ,dest = "repeat_last_n" )
71
- parser .add_argument ("--repeat_penalty" , type = float , default = 1.0 , help = "" ,dest = "repeat_penalty" )
72
- parser .add_argument ("-b" , "--batch_size" , type = int , default = 8 , help = "" ,dest = "n_batch" )
73
- parser .add_argument ("--keep" , type = int , default = 0 , help = "" ,dest = "n_keep" )
74
- parser .add_argument ("-m" , "--model" , type = str , help = "" ,dest = "model" )
67
+ parser = argparse .ArgumentParser (formatter_class = argparse .ArgumentDefaultsHelpFormatter )
68
+ parser .add_argument ("-s" , "--seed" , type = int , default = - 1 , help = "RNG seed (use random seed for <= 0)" ,dest = "seed" )
69
+ parser .add_argument ("-t" , "--threads" , type = int , default = min (4 , os .cpu_count () or 1 ), help = "number of threads to use during computation" ,dest = "n_threads" )
70
+ parser .add_argument ("-p" , "--prompt" , type = str , default = "" , help = "initial prompt" ,dest = "prompt" )
71
+ parser .add_argument ("-f" , "--file" , type = str , default = None , help = "file containing initial prompt to load" ,dest = "file" )
72
+ parser .add_argument ("-c" , "--ctx_size" , type = int , default = 512 , help = "size of the prompt context" ,dest = "n_ctx" )
73
+ parser .add_argument ("--memory_f32" , action = "store_false" , help = "use f32 instead of f16 for memory key+value" ,dest = "memory_f16" )
74
+ parser .add_argument ("--top_p" , type = float , default = 0.95 , help = "top-p samplin" ,dest = "top_p" )
75
+ parser .add_argument ("--top_k" , type = int , default = 40 , help = "top-k sampling" ,dest = "top_k" )
76
+ parser .add_argument ("--temp" , type = float , default = 0.80 , help = "temperature" ,dest = "temp" )
77
+ parser .add_argument ("--n_predict" , type = int , default = 128 , help = "number of model parts" ,dest = "n_predict" )
78
+ parser .add_argument ("--repeat_last_n" , type = int , default = 64 , help = "last n tokens to consider for penalize " ,dest = "repeat_last_n" )
79
+ parser .add_argument ("--repeat_penalty" , type = float , default = 1.10 , help = "penalize repeat sequence of tokens" ,dest = "repeat_penalty" )
80
+ parser .add_argument ("-b" , "--batch_size" , type = int , default = 8 , help = "batch size for prompt processing" ,dest = "n_batch" )
81
+ parser .add_argument ("--keep" , type = int , default = 0 , help = "number of tokens to keep from the initial prompt" ,dest = "n_keep" )
82
+ parser .add_argument ("-m" , "--model" , type = str , default = "./models/llama-7B/ggml-model.bin" , help = "model path" ,dest = "model" )
75
83
parser .add_argument (
76
84
"-i" , "--interactive" , action = "store_true" , help = "run in interactive mode" , dest = "interactive"
77
85
)
78
86
parser .add_argument ("--embedding" , action = "store_true" , help = "" , dest = "embedding" )
79
- parser .add_argument ("--interactive-start" , action = "store_true" , help = "" , dest = "interactive_start" )
87
+ parser .add_argument (
88
+ "--interactive-start" ,
89
+ action = "store_true" ,
90
+ help = "run in interactive mode" ,
91
+ dest = "interactive"
92
+ )
80
93
parser .add_argument (
81
94
"--interactive-first" ,
82
95
action = "store_true" ,
83
96
help = "run in interactive mode and wait for input right away" ,
84
- dest = "interactive "
97
+ dest = "interactive_start "
85
98
)
86
99
parser .add_argument (
87
100
"-ins" ,
@@ -96,24 +109,24 @@ def gpt_params_parse(argv = None, params: Optional[GptParams] = None):
96
109
help = "colorise output to distinguish prompt and user input from generations" ,
97
110
dest = "use_color"
98
111
)
99
- parser .add_argument ("--mlock" , action = "store_true" ,dest = "use_mlock" )
100
- parser .add_argument ("--mtest" , action = "store_true" ,dest = "mem_test" )
112
+ parser .add_argument ("--mlock" , action = "store_true" ,help = "force system to keep model in RAM rather than swapping or compressing" , dest = "use_mlock" )
113
+ parser .add_argument ("--mtest" , action = "store_true" ,help = "compute maximum memory usage" , dest = "mem_test" )
101
114
parser .add_argument (
102
115
"-r" ,
103
116
"--reverse-prompt" ,
104
117
type = str ,
105
118
action = 'append' ,
106
- help = "run in interactive mode and poll user input upon seeing PROMPT (can be\n specified more than once for multiple prompts)." ,
119
+ help = "poll user input upon seeing PROMPT (can be\n specified more than once for multiple prompts)." ,
107
120
dest = "antiprompt"
108
121
)
109
- parser .add_argument ("--perplexity" , action = "store_true" , help = "" , dest = "perplexity" )
110
- parser .add_argument ("--ignore-eos" , action = "store_true" , help = "" , dest = "ignore_eos" )
111
- parser .add_argument ("--n_parts" , type = int , default = - 1 , help = "" , dest = "n_parts" )
112
- parser .add_argument ("--random-prompt" , action = "store_true" , help = "" , dest = "random_prompt" )
113
- parser .add_argument ("--in-prefix" , type = str , default = " " , help = "" , dest = "input_prefix" )
114
- parser .add_argument ("--fix-prefix" , type = str , default = " " , help = "" , dest = "fix_prefix" )
115
- parser .add_argument ("--out-postfix" , type = str , default = "" , help = "" , dest = "output_postfix" )
116
- parser .add_argument ("--input-noecho" , action = "store_false" , help = "" , dest = "input_echo" )
122
+ parser .add_argument ("--perplexity" , action = "store_true" , help = "compute perplexity over the prompt " , dest = "perplexity" )
123
+ parser .add_argument ("--ignore-eos" , action = "store_true" , help = "ignore end of stream token and continue generating " , dest = "ignore_eos" )
124
+ parser .add_argument ("--n_parts" , type = int , default = - 1 , help = "number of model parts " , dest = "n_parts" )
125
+ parser .add_argument ("--random-prompt" , action = "store_true" , help = "start with a randomized prompt. " , dest = "random_prompt" )
126
+ parser .add_argument ("--in-prefix" , type = str , default = "" , help = "string to prefix user inputs with " , dest = "input_prefix" )
127
+ parser .add_argument ("--fix-prefix" , type = str , default = "" , help = "append to input when generated n_predict tokens " , dest = "fix_prefix" )
128
+ parser .add_argument ("--out-postfix" , type = str , default = "" , help = "append to input " , dest = "output_postfix" )
129
+ parser .add_argument ("--input-noecho" , action = "store_false" , help = "dont output the input " , dest = "input_echo" )
117
130
args = parser .parse_args (argv )
118
131
return args
119
132
0 commit comments