-
-
Notifications
You must be signed in to change notification settings - Fork 10.5k
Closed as not planned
Closed as not planned
Copy link
Labels
new-modelRequests to new modelsRequests to new modelsstaleOver 90 days of inactivityOver 90 days of inactivity
Description
The model to consider.
from typing import List
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from PIL import Image
from vllm import LLM, SamplingParams
import os
import uvicorn
import time
app = FastAPI()
class InferenceRequest(BaseModel):
model: str
question: str
image_paths: List[str]
# Initialize models once during application startup
models = {}
def load_image_from_path(image_path: str) -> Image.Image:
"""Load a PIL image from a local file path."""
if not os.path.isfile(image_path):
raise ValueError(f"File {image_path} does not exist.")
return Image.open(image_path).convert("RGB")
def load_phi3v():
"""Load Phi3V model and return instance."""
return LLM(
model="microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
max_model_len=4096,
limit_mm_per_prompt={"image": 1},
)
def initialize_models():
"""Initialize all models required for inference."""
global models
models["phi3_v"] = load_phi3v()
def load_phi3v_prompt(question, image_paths: List[str]):
placeholders = "\n".join(f"<|image_{i}|>" for i, _ in enumerate(image_paths, start=1))
prompt = f"<|user|>\n{placeholders}\n{question}<|end|>\n<|assistant|>\n"
stop_token_ids = None
return prompt, stop_token_ids
def run_generate(model_name: str, question: str, image_paths: List[str]):
if model_name not in models:
raise ValueError(f"Model {model_name} is not loaded.")
llm = models[model_name]
prompt, stop_token_ids = load_phi3v_prompt(question, image_paths)
image_data = [load_image_from_path(path) for path in image_paths]
sampling_params = SamplingParams(temperature=0.0, max_tokens=128, stop_token_ids=stop_token_ids)
outputs = llm.generate(
{
"prompt": prompt,
"multi_modal_data": {
"image": image_data
},
},
sampling_params=sampling_params
)
return [o.outputs[0].text for o in outputs]
@app.on_event("startup")
async def startup_event():
initialize_models()
@app.post("/inference")
async def inference(request: InferenceRequest):
try:
start_time = time.time()
result = run_generate(request.model, request.question, request.image_paths)
end_time = time.time()
print("total time taken",end_time-start_time)
return {"results": result}
except ValueError as ve:
raise HTTPException(status_code=400, detail=str(ve))
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8002)
The closest model vllm already supports.
phi-3.5 vision instruct model and need reference for this
What's your difficulty of supporting the model you want?
does not contain any information about quantization for phi-3.5 vision instruct model
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
Metadata
Metadata
Assignees
Labels
new-modelRequests to new modelsRequests to new modelsstaleOver 90 days of inactivityOver 90 days of inactivity