vllm-project · Player256 · Dec 16, 2024 · Jan 4, 2025 · Jan 4, 2025 · Jan 4, 2025
diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py
@@ -464,6 +464,23 @@ def run_nvlm_d(questions: list[str], modality: str):
     return llm, prompts, stop_token_ids
 
 
+#Ovis
+def run_ovis(question: str, modality: str):
+    assert modality == "image"
+
+    model_name = "AIDC-AI/Ovis1.6-Llama3.2-3B"
+
+    llm = LLM(
+        model=model_name,
+        dtype="float16",
+        max_model_len=8192,
+        trust_remote_code=True,
+    )
+    stop_token_ids = [1, 107]
+    prompt = f"<image>\n{question}"
+    return llm, prompt, stop_token_ids
+
+
 # PaliGemma
 def run_paligemma(question: str, modality: str):
     assert modality == "image"
@@ -678,6 +695,7 @@ def run_qwen2_5_vl(questions: list[str], modality: str):
     "mllama": run_mllama,
     "molmo": run_molmo,
     "NVLM_D": run_nvlm_d,
+    "ovis": run_ovis,
     "paligemma": run_paligemma,
     "paligemma2": run_paligemma2,
     "phi3_v": run_phi3v,

diff --git a/examples/offline_inference/vision_language_multi_image.py b/examples/offline_inference/vision_language_multi_image.py
@@ -233,9 +233,26 @@
        prompt=prompt,
         stop_token_ids=stop_token_ids,
         image_data=[fetch_image(url) for url in image_urls],
         chat_template=None,
-    )
+
+def load_ovis(question: str, image_urls: List[str]) -> ModelRequestData:
+    model = "AIDC-AI/Ovis1.6-Gemma2-9B"
+
+    llm = LLM(model=model,
+              max_model_len=8192,
+              dtype="bfloat16",
+              trust_remote_code=True)
+    prompt = f"<image>\n{question}"
 
+    stop_token_ids = [1, 107]
+
+    return ModelRequestData(
+        llm=llm,
+        prompt=prompt,
+        stop_token_ids=stop_token_ids,
+        image_data=[fetch_image(url) for url in image_urls],
+        chat_template=None,
+    )
 
 def load_pixtral_hf(question: str, image_urls: list[str]) -> ModelRequestData:
     model_name = "mistral-community/pixtral-12b"
@@ -501,6 +518,7 @@
     "internvl_chat": load_internvl,
     "mllama": load_mllama,
     "NVLM_D": load_nvlm_d,
+    "ovis": load_ovis,
     "phi3_v": load_phi3v,
     "phi4_mm": load_phi4mm,
     "pixtral_hf": load_pixtral_hf,

@@ -268,6 +268,7 @@ def check_available_online(
                                         trust_remote_code=True),
     "NVLM_D": _HfExamplesInfo("nvidia/NVLM-D-72B",
                               trust_remote_code=True),
+    "Ovis": _HfExamplesInfo("AIDC-AI/Ovis1.6-Llama3.2-3B",trust_remote_code=True),  # noqa: E501
     "PaliGemmaForConditionalGeneration": _HfExamplesInfo("google/paligemma-3b-mix-224",  # noqa: E501
                                                          extras={"v2": "google/paligemma2-3b-ft-docci-448"}),  # noqa: E501
     "Phi3VForCausalLM": _HfExamplesInfo("microsoft/Phi-3-vision-128k-instruct",