From d3499be3d55f10e6716c894467b394e421fc292d Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Tue, 7 Jan 2025 16:05:33 -0300 Subject: [PATCH 1/6] treat do_lower_case in the same way as the sentence-transformers library Signed-off-by: Max de Bayser --- vllm/entrypoints/openai/serving_engine.py | 5 +++++ vllm/transformers_utils/tokenizer_group/__init__.py | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 319f86924003..88859255f202 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -160,6 +160,11 @@ def _normalize_prompt_text_to_input( truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]], add_special_tokens: bool, ) -> TextTokensPrompt: + if (self.model_config.encoder_config is not None + and self.model_config.encoder_config.get( + "do_lower_case", False)): + prompt = prompt.lower() + if truncate_prompt_tokens is None: encoded = tokenizer(prompt, add_special_tokens=add_special_tokens) else: diff --git a/vllm/transformers_utils/tokenizer_group/__init__.py b/vllm/transformers_utils/tokenizer_group/__init__.py index c0b3d2585a96..d40027679699 100644 --- a/vllm/transformers_utils/tokenizer_group/__init__.py +++ b/vllm/transformers_utils/tokenizer_group/__init__.py @@ -26,11 +26,6 @@ def init_tokenizer_from_configs(model_config: ModelConfig, trust_remote_code=model_config.trust_remote_code, revision=model_config.tokenizer_revision) - if (model_config.encoder_config is not None - and "do_lower_case" in model_config.encoder_config): - init_kwargs["do_lower_case"] = model_config.encoder_config[ - "do_lower_case"] - return get_tokenizer_group(parallel_config.tokenizer_pool_config, **init_kwargs) From b0c4f53dba1a9b0e5a0f06c61757713f300c2b47 Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Wed, 8 Jan 2025 10:01:29 -0300 Subject: [PATCH 2/6] fix the LLM interface too Signed-off-by: Max de Bayser --- vllm/inputs/preprocess.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/inputs/preprocess.py b/vllm/inputs/preprocess.py index b362ee0cac32..0e7aeed7db5d 100644 --- a/vllm/inputs/preprocess.py +++ b/vllm/inputs/preprocess.py @@ -190,6 +190,12 @@ def _tokenize_prompt( # on the task and language of their request. Also needed to avoid # appending an EOS token to the prompt which disrupts generation. add_special_tokens = False + + if (self.model_config.encoder_config is not None + and self.model_config.encoder_config.get( + "do_lower_case", False)): + prompt = prompt.lower() + return tokenizer.encode(request_id=request_id, prompt=prompt, lora_request=lora_request, From 84971b2213cb9a1e314cbea71f1b70cb549173dd Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Wed, 8 Jan 2025 11:18:45 -0300 Subject: [PATCH 3/6] add sentence-transformers/all-MiniLM-L12-v2 test Signed-off-by: Max de Bayser --- tests/models/embedding/language/test_embedding.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/models/embedding/language/test_embedding.py b/tests/models/embedding/language/test_embedding.py index f458ef5ef556..94a2e4307ffd 100644 --- a/tests/models/embedding/language/test_embedding.py +++ b/tests/models/embedding/language/test_embedding.py @@ -15,6 +15,8 @@ # [Encoder-only] pytest.param("BAAI/bge-base-en-v1.5", marks=[pytest.mark.core_model, pytest.mark.cpu_model]), + pytest.param("sentence-transformers/all-MiniLM-L12-v2", + marks=[pytest.mark.core_model, pytest.mark.cpu_model]), pytest.param("intfloat/multilingual-e5-large"), # [Encoder-decoder] pytest.param("intfloat/e5-mistral-7b-instruct", From f26c4e4b7ae9e3f774758ac34c044b689a6f7b90 Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Wed, 8 Jan 2025 11:40:00 -0300 Subject: [PATCH 4/6] make model non-core non-cpu Signed-off-by: Max de Bayser --- tests/models/embedding/language/test_embedding.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/models/embedding/language/test_embedding.py b/tests/models/embedding/language/test_embedding.py index 94a2e4307ffd..7749806548cd 100644 --- a/tests/models/embedding/language/test_embedding.py +++ b/tests/models/embedding/language/test_embedding.py @@ -15,8 +15,7 @@ # [Encoder-only] pytest.param("BAAI/bge-base-en-v1.5", marks=[pytest.mark.core_model, pytest.mark.cpu_model]), - pytest.param("sentence-transformers/all-MiniLM-L12-v2", - marks=[pytest.mark.core_model, pytest.mark.cpu_model]), + pytest.param("sentence-transformers/all-MiniLM-L12-v2"), pytest.param("intfloat/multilingual-e5-large"), # [Encoder-decoder] pytest.param("intfloat/e5-mistral-7b-instruct", From 84b9bd4dcf3f995484be490f1260b6b8908ff9c9 Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Wed, 8 Jan 2025 17:04:01 -0300 Subject: [PATCH 5/6] fix broken test Signed-off-by: Max de Bayser --- tests/entrypoints/openai/test_serving_chat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py index 97248f115097..f431d1065e0e 100644 --- a/tests/entrypoints/openai/test_serving_chat.py +++ b/tests/entrypoints/openai/test_serving_chat.py @@ -35,6 +35,7 @@ class MockModelConfig: logits_processor_pattern = None diff_sampling_param: Optional[dict] = None allowed_local_media_path: str = "" + encoder_config = None def get_diff_sampling_param(self): return self.diff_sampling_param or {} From b6e9948d2d31822e407ab63768f7628565ec8e24 Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Wed, 8 Jan 2025 20:08:50 -0300 Subject: [PATCH 6/6] trigger ci Signed-off-by: Max de Bayser