-
-
Notifications
You must be signed in to change notification settings - Fork 10.6k
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
Your current environment
The output of `python collect_env.py`
🐛 Describe the bug
tensor_parallel_size=1 works fine, but error when tensor_parallel_size>1.
import torch
from PIL import Image
from transformers import AutoModel, AutoTokenizer
from vllm import LLM, SamplingParams
model_path = "/home/work/MiniCPM-Llama3-V-2_5"
image = Image.open('x.jpg').convert('RGB')
llm = LLM(
model=model_path,
trust_remote_code=True,
tensor_parallel_size=2,
)
question = 'What is in the image?'
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
messages = [{
'role': 'user',
'content': f'(<image>./</image>)\n{question}'
}]
prompt = tokenizer.apply_chat_template(messages,
tokenize=False,
add_generation_prompt=True)
sampling_params = SamplingParams(temperature=0.7, max_tokens=512, stop_token_ids=[128001, 128009])
inputs = {
"prompt": prompt,
"multi_modal_data": {
"image": image
},
}
outputs = llm.generate(inputs, sampling_params=sampling_params)
for o in outputs:
generated_text = o.outputs[0].text
print(generated_text)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/home/work/gitclone/vllm-main/vllm/executor/multiproc_worker_utils.py", line 223, in _run_worker_process
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] output = executor(*args, **kwargs)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] return func(*args, **kwargs)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/home/work/gitclone/vllm-main/vllm/worker/worker_base.py", line 65, in start_worker_execution_loop
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] output = self.execute_model(execute_model_req=None)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/home/work/gitclone/vllm-main/vllm/worker/worker_base.py", line 272, in execute_model
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] output = self.model_runner.execute_model(
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] return func(*args, **kwargs)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/home/work/gitclone/vllm-main/vllm/worker/model_runner.py", line 1354, in execute_model
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] hidden_or_intermediate_states = model_executable(
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] return self._call_impl(*args, **kwargs)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] return forward_call(*args, **kwargs)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/home/work/gitclone/vllm-main/vllm/model_executor/models/minicpmv.py", line 619, in forward
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] vlm_embeddings, vision_hidden_states = self.get_embedding(inputs)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/home/work/gitclone/vllm-main/vllm/model_executor/models/minicpmv.py", line 562, in get_embedding
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] vision_hidden_states = self.get_vision_hidden_states(data)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/home/work/gitclone/vllm-main/vllm/model_executor/models/minicpmv.py", line 545, in get_vision_hidden_states
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] vision_embedding = self.vpm(
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] return self._call_impl(*args, **kwargs)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] return forward_call(*args, **kwargs)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/usr/local/lib/python3.10/dist-packages/transformers/models/idefics2/modeling_idefics2.py", line 617, in forward
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] hidden_states = self.embeddings(pixel_values=pixel_values, patch_attention_mask=patch_attention_mask)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] return self._call_impl(*args, **kwargs)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] return forward_call(*args, **kwargs)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/usr/local/lib/python3.10/dist-packages/transformers/models/idefics2/modeling_idefics2.py", line 162, in forward
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] patch_embeds = self.patch_embedding(pixel_values)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] return self._call_impl(*args, **kwargs)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] return forward_call(*args, **kwargs)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py", line 460, in forward
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] return self._conv_forward(input, self.weight, self.bias)
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py", line 456, in _conv_forward
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] return F.conv2d(input, weight, bias, self.stride,
(VllmWorkerProcess pid=11819) ERROR 07-30 20:15:19 multiproc_worker_utils.py:226] RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1! (when checking argument for argument weight in method wrapper_CUDA__cudnn_convolution)
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working