@@ -195,6 +195,7 @@ class EngineArgs:
195195 kv_transfer_config : Optional [KVTransferConfig ] = None
196196
197197 generation_config : Optional [str ] = None
198+ override_generation_config : Optional [Dict [str , Any ]] = None
198199 enable_sleep_mode : bool = False
199200
200201 calculate_kv_scales : Optional [bool ] = None
@@ -936,12 +937,23 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
936937 type = nullable_str ,
937938 default = None ,
938939 help = "The folder path to the generation config. "
939- "Defaults to None, will use the default generation config in vLLM. "
940- "If set to 'auto', the generation config will be automatically "
941- "loaded from model. If set to a folder path, the generation config "
942- "will be loaded from the specified folder path. If "
943- "`max_new_tokens` is specified, then it sets a server-wide limit "
944- "on the number of output tokens for all requests." )
940+ "Defaults to None, no generation config is loaded, vLLM defaults "
941+ "will be used. If set to 'auto', the generation config will be "
942+ "loaded from model path. If set to a folder path, the generation "
943+ "config will be loaded from the specified folder path. If "
944+ "`max_new_tokens` is specified in generation config, then "
945+ "it sets a server-wide limit on the number of output tokens "
946+ "for all requests." )
947+
948+ parser .add_argument (
949+ "--override-generation-config" ,
950+ type = json .loads ,
951+ default = None ,
952+ help = "Overrides or sets generation config in JSON format. "
953+ "e.g. ``{\" temperature\" : 0.5}``. If used with "
954+ "--generation-config=auto, the override parameters will be merged "
955+ "with the default config from the model. If generation-config is "
956+ "None, only the override parameters are used." )
945957
946958 parser .add_argument ("--enable-sleep-mode" ,
947959 action = "store_true" ,
@@ -1002,6 +1014,7 @@ def create_model_config(self) -> ModelConfig:
10021014 override_pooler_config = self .override_pooler_config ,
10031015 logits_processor_pattern = self .logits_processor_pattern ,
10041016 generation_config = self .generation_config ,
1017+ override_generation_config = self .override_generation_config ,
10051018 enable_sleep_mode = self .enable_sleep_mode ,
10061019 )
10071020
0 commit comments