From 2c11df8f914064958c1a887960476c02cbd769f9 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Fri, 24 Jan 2025 22:32:40 +0800 Subject: [PATCH 1/4] support override gen config Signed-off-by: liuyanyi --- vllm/config.py | 13 +++++++++++-- vllm/engine/arg_utils.py | 9 +++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index efd81ad3de3b..d742a39f350a 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -164,6 +164,8 @@ class ModelConfig: `logits_processors` extra completion argument. Defaults to None, which allows no processors. generation_config: Configuration parameter file for generation. + override_generation_config: Override the generation config with the + given config. """ def compute_hash(self) -> str: @@ -224,6 +226,7 @@ def __init__( logits_processor_pattern: Optional[str] = None, generation_config: Optional[str] = None, enable_sleep_mode: bool = False, + override_generation_config: Optional[Dict[str, Any]] = None, ) -> None: self.model = model self.tokenizer = tokenizer @@ -366,6 +369,7 @@ def __init__( self.logits_processor_pattern = logits_processor_pattern self.generation_config = generation_config + self.override_generation_config = override_generation_config or {} self._verify_quantization() self._verify_cuda_graph() @@ -902,8 +906,13 @@ def get_diff_sampling_param(self) -> Dict[str, Any]: """ if self.generation_config is None: # When generation_config is not set - return {} - config = self.try_get_generation_config() + config = {} + else: + config = self.try_get_generation_config() + + # Overriding with given generation config + config.update(self.override_generation_config) + available_params = [ "repetition_penalty", "temperature", diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 8f1b0bc5fd62..d6e8dd59269c 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -196,6 +196,7 @@ class EngineArgs: kv_transfer_config: Optional[KVTransferConfig] = None generation_config: Optional[str] = None + override_generation_config: Optional[Dict[str, Any]] = None enable_sleep_mode: bool = False calculate_kv_scales: Optional[bool] = None @@ -945,6 +946,13 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: "If set to 'auto', the generation config will be automatically " "loaded from model. If set to a folder path, the generation config " "will be loaded from the specified folder path.") + parser.add_argument( + "--override-generation-config", + type=json.loads, + default=None, + help="Override or set generation config. " + "Defaults to None, will use for the default generation config. " + "e.g. ``{\"temperature\": 0.5, \"top_k\": 50}``.") parser.add_argument("--enable-sleep-mode", action="store_true", @@ -1005,6 +1013,7 @@ def create_model_config(self) -> ModelConfig: override_pooler_config=self.override_pooler_config, logits_processor_pattern=self.logits_processor_pattern, generation_config=self.generation_config, + override_generation_config=self.override_generation_config, enable_sleep_mode=self.enable_sleep_mode, ) From 9d3f2777612a7bfb5c2c115eda5969d53ea82ee9 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Fri, 24 Jan 2025 22:50:50 +0800 Subject: [PATCH 2/4] add tests Signed-off-by: liuyanyi --- tests/test_config.py | 55 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tests/test_config.py b/tests/test_config.py index 4518adfc31bf..fb63fa0300fd 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -281,3 +281,58 @@ def test_uses_mrope(model_id, uses_mrope): ) assert config.uses_mrope == uses_mrope + + +def test_generation_config_loading(): + model_id = "/large-storage/model/Qwen2.5/qwen/Qwen2___5-1___5B-Instruct/" + + # When set generation_config to None, the default generation config + # will not be loaded. + model_config = ModelConfig(model_id, + task="auto", + tokenizer=model_id, + tokenizer_mode="auto", + trust_remote_code=False, + seed=0, + dtype="float16", + generation_config=None) + assert model_config.get_diff_sampling_param() == {} + + # When set generation_config to "auto", the default generation config + # should be loaded. + model_config = ModelConfig(model_id, + task="auto", + tokenizer=model_id, + tokenizer_mode="auto", + trust_remote_code=False, + seed=0, + dtype="float16", + generation_config="auto") + + correct_generation_config = { + "repetition_penalty": 1.1, + "temperature": 0.7, + "top_p": 0.8, + "top_k": 20, + } + + assert model_config.get_diff_sampling_param() == correct_generation_config + + # The generation config could be overridden by the user. + override_generation_config = {"temperature": 0.5, "top_k": 5} + + model_config = ModelConfig( + model_id, + task="auto", + tokenizer=model_id, + tokenizer_mode="auto", + trust_remote_code=False, + seed=0, + dtype="float16", + generation_config="auto", + override_generation_config=override_generation_config) + + override_result = correct_generation_config.copy() + override_result.update(override_generation_config) + + assert model_config.get_diff_sampling_param() == override_result From c662d0e0f0e8576077244c6e78112b07edb41ab9 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Fri, 24 Jan 2025 22:59:13 +0800 Subject: [PATCH 3/4] fix local path Signed-off-by: liuyanyi --- tests/test_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_config.py b/tests/test_config.py index fb63fa0300fd..c7126a4ca8fa 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -284,7 +284,7 @@ def test_uses_mrope(model_id, uses_mrope): def test_generation_config_loading(): - model_id = "/large-storage/model/Qwen2.5/qwen/Qwen2___5-1___5B-Instruct/" + model_id = "Qwen/Qwen2.5-1.5B-Instruct" # When set generation_config to None, the default generation config # will not be loaded. From bc8697c818669e11e5e3192547ffe51654c64a6f Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Sat, 25 Jan 2025 14:51:32 +0800 Subject: [PATCH 4/4] rewrite help Signed-off-by: liuyanyi --- tests/test_config.py | 15 +++++++++++++++ vllm/engine/arg_utils.py | 17 ++++++++++------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index c7126a4ca8fa..ec366b93d6a3 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -336,3 +336,18 @@ def test_generation_config_loading(): override_result.update(override_generation_config) assert model_config.get_diff_sampling_param() == override_result + + # When generation_config is set to None and override_generation_config + # is set, the override_generation_config should be used directly. + model_config = ModelConfig( + model_id, + task="auto", + tokenizer=model_id, + tokenizer_mode="auto", + trust_remote_code=False, + seed=0, + dtype="float16", + generation_config=None, + override_generation_config=override_generation_config) + + assert model_config.get_diff_sampling_param() == override_generation_config diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index d6e8dd59269c..58814dd087c7 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -942,17 +942,20 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: type=nullable_str, default=None, help="The folder path to the generation config. " - "Defaults to None, will use the default generation config in vLLM. " - "If set to 'auto', the generation config will be automatically " - "loaded from model. If set to a folder path, the generation config " - "will be loaded from the specified folder path.") + "Defaults to None, no generation config is loaded, vLLM defaults " + "will be used. If set to 'auto', the generation config will be " + "loaded from model path. If set to a folder path, the generation " + "config will be loaded from the specified folder path.") + parser.add_argument( "--override-generation-config", type=json.loads, default=None, - help="Override or set generation config. " - "Defaults to None, will use for the default generation config. " - "e.g. ``{\"temperature\": 0.5, \"top_k\": 50}``.") + help="Overrides or sets generation config in JSON format. " + "e.g. ``{\"temperature\": 0.5}``. If used with " + "--generation-config=auto, the override parameters will be merged " + "with the default config from the model. If generation-config is " + "None, only the override parameters are used.") parser.add_argument("--enable-sleep-mode", action="store_true",