Skip to content

Commit 17e949d

Browse files
rdimitrovLuke Hinds
authored and
Luke Hinds
committed
Enable the integration tests for vllm (#806)
* Add integration tests for vllm Signed-off-by: Radoslav Dimitrov <[email protected]> * Try using Qwen/Qwen2.5-Coder-3B-Instruct Signed-off-by: Radoslav Dimitrov <[email protected]> * Go back to using Qwen/Qwen2.5-Coder-0.5B-Instruct Signed-off-by: Radoslav Dimitrov <[email protected]> * Reformat the vllm_fim test Signed-off-by: Radoslav Dimitrov <[email protected]> * Use Qwen/Qwen2.5-Coder-0.5B Signed-off-by: Radoslav Dimitrov <[email protected]> * Revert "Use Qwen/Qwen2.5-Coder-0.5B" This reverts commit 32b2d8c. * Update the expected result for vllm_fim Signed-off-by: Radoslav Dimitrov <[email protected]> --------- Signed-off-by: Radoslav Dimitrov <[email protected]>
1 parent ca82d2f commit 17e949d

File tree

4 files changed

+141
-56
lines changed

4 files changed

+141
-56
lines changed

.github/workflows/integration-tests.yml

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ jobs:
6262
-v "$(pwd)"/codegate_volume:/app/codegate_volume \
6363
-e CODEGATE_APP_LOG_LEVEL=DEBUG \
6464
-e CODEGATE_OLLAMA_URL=http://localhost:11434 \
65+
-e CODEGATE_VLLM_URL=http://localhost:8000 \
6566
--restart unless-stopped $DOCKER_IMAGE
6667
6768
# Confirm the container started
@@ -181,7 +182,60 @@ jobs:
181182
run: |
182183
docker logs ollama
183184
184-
- name: Print the container logs (useful for debugging)
185+
- name: Build and run the vllm container
186+
run: |
187+
git clone https://github.com/vllm-project/vllm.git
188+
cd vllm
189+
docker build -f Dockerfile.cpu -t vllm-cpu-env --shm-size=4g .
190+
docker run -d --name vllm \
191+
--network="host" \
192+
vllm-cpu-env --model Qwen/Qwen2.5-Coder-0.5B-Instruct
193+
194+
- name: Verify the vllm container is running
195+
run: |
196+
echo -e "\nVerify the vllm container is serving\n"
197+
docker ps -f name=vllm
198+
199+
echo "Loop until the endpoint responds successfully"
200+
while ! curl --silent --fail --get "http://localhost:8000/ping" >/dev/null; do
201+
echo "Ping not available yet. Retrying in 2 seconds..."
202+
sleep 2
203+
done
204+
echo -e "\nPing is now available!\n"
205+
206+
echo -e "\nVerify the completions endpoint works\n"
207+
curl http://localhost:8000/v1/completions -H "Content-Type: application/json" -d '{
208+
"model": "Qwen/Qwen2.5-Coder-0.5B-Instruct",
209+
"prompt": ["How to make pizza"],
210+
"max_tokens": 100,
211+
"temperature": 0
212+
}'
213+
214+
echo -e "\nVerify the chat/completions endpoint works\n"
215+
curl -X POST http://localhost:8000/v1/chat/completions \
216+
-H "Content-Type: application/json" \
217+
-d '{
218+
"model": "Qwen/Qwen2.5-Coder-0.5B-Instruct",
219+
"messages": [
220+
{"role": "system", "content": "You are a coding assistant."},
221+
{"role": "user", "content": "Hello"}
222+
],
223+
"temperature": 0,
224+
"max_tokens": 4096,
225+
"extra_body": {}
226+
}'
227+
228+
# Print a new line and then the message in a single echo
229+
echo -e "\nPrint the vllm container logs\n"
230+
docker logs vllm
231+
232+
- name: Run integration tests - vllm
233+
env:
234+
CODEGATE_PROVIDERS: "vllm"
235+
run: |
236+
poetry run python tests/integration/integration_tests.py
237+
238+
- name: Print the CodeGate container logs (useful for debugging)
185239
if: always()
186240
run: |
187241
docker logs $CONTAINER_NAME
@@ -194,3 +248,8 @@ jobs:
194248
echo "DB contents:"
195249
ls -la codegate_volume/db
196250
docker exec $CONTAINER_NAME ls -la /app/codegate_volume/db
251+
252+
- name: Print the vllm container logs (useful for debugging)
253+
if: always()
254+
run: |
255+
docker logs vllm

src/codegate/providers/vllm/adapter.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ def _has_chat_ml_format(data: Dict) -> bool:
102102
content = input_chat_request["messages"][0]["content"]
103103
if isinstance(content, str) and "<|im_start|>" in content:
104104
return True
105+
return False
105106

106107
def normalize(self, data: Dict) -> ChatCompletionRequest:
107108
"""
@@ -117,12 +118,6 @@ def normalize(self, data: Dict) -> ChatCompletionRequest:
117118
if not model_name.startswith("hosted_vllm/"):
118119
normalized_data["model"] = f"hosted_vllm/{model_name}"
119120

120-
# Ensure the base_url ends with /v1 if provided
121-
if "base_url" in normalized_data:
122-
base_url = normalized_data["base_url"].rstrip("/")
123-
if not base_url.endswith("/v1"):
124-
normalized_data["base_url"] = f"{base_url}/v1"
125-
126121
ret_data = normalized_data
127122
if self._has_chat_ml_format(normalized_data):
128123
ret_data = self._chat_ml_normalizer.normalize(normalized_data)
Lines changed: 55 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
from urllib.parse import urljoin
23

34
import httpx
45
import structlog
@@ -31,6 +32,19 @@ def __init__(
3132
def provider_route_name(self) -> str:
3233
return "vllm"
3334

35+
def _get_base_url(self) -> str:
36+
"""
37+
Get the base URL from config with proper formatting
38+
"""
39+
config = Config.get_config()
40+
base_url = config.provider_urls.get("vllm") if config else ""
41+
if base_url:
42+
base_url = base_url.rstrip("/")
43+
# Add /v1 if not present
44+
if not base_url.endswith("/v1"):
45+
base_url = f"{base_url}/v1"
46+
return base_url
47+
3448
def models(self):
3549
resp = httpx.get(f"{self.base_url}/v1/models")
3650
jsonresp = resp.json()
@@ -40,60 +54,69 @@ def models(self):
4054
def _setup_routes(self):
4155
"""
4256
Sets up the /chat/completions route for the provider as expected by the
43-
OpenAI API. Extracts the API key from the "Authorization" header and
44-
passes it to the completion handler.
57+
OpenAI API. Makes the API key optional in the "Authorization" header.
4558
"""
4659

4760
@self.router.get(f"/{self.provider_route_name}/models")
48-
async def get_models(authorization: str = Header(..., description="Bearer token")):
49-
if not authorization.startswith("Bearer "):
50-
raise HTTPException(status_code=401, detail="Invalid authorization header")
51-
52-
token = authorization.split(" ")[1]
53-
config = Config.get_config()
54-
if config:
55-
base_url = config.provider_urls.get("vllm")
56-
else:
57-
base_url = ""
58-
59-
async with httpx.AsyncClient() as client:
60-
response = await client.get(
61-
f"{base_url}/v1/models", headers={"Authorization": f"Bearer {token}"}
61+
async def get_models(
62+
authorization: str | None = Header(None, description="Optional Bearer token")
63+
):
64+
base_url = self._get_base_url()
65+
headers = {}
66+
67+
if authorization:
68+
if not authorization.startswith("Bearer "):
69+
raise HTTPException(
70+
status_code=401, detail="Invalid authorization header format"
71+
)
72+
token = authorization.split(" ")[1]
73+
headers["Authorization"] = f"Bearer {token}"
74+
75+
try:
76+
models_url = urljoin(base_url, "v1/models")
77+
async with httpx.AsyncClient() as client:
78+
response = await client.get(models_url, headers=headers)
79+
response.raise_for_status()
80+
return response.json()
81+
except httpx.HTTPError as e:
82+
logger = structlog.get_logger("codegate")
83+
logger.error("Error fetching vLLM models", error=str(e))
84+
raise HTTPException(
85+
status_code=e.response.status_code if hasattr(e, "response") else 500,
86+
detail=str(e),
6287
)
63-
response.raise_for_status()
64-
return response.json()
6588

6689
@self.router.post(f"/{self.provider_route_name}/chat/completions")
6790
@self.router.post(f"/{self.provider_route_name}/completions")
6891
async def create_completion(
6992
request: Request,
70-
authorization: str = Header(..., description="Bearer token"),
93+
authorization: str | None = Header(None, description="Optional Bearer token"),
7194
):
72-
if not authorization.startswith("Bearer "):
73-
raise HTTPException(status_code=401, detail="Invalid authorization header")
95+
api_key = None
96+
if authorization:
97+
if not authorization.startswith("Bearer "):
98+
raise HTTPException(
99+
status_code=401, detail="Invalid authorization header format"
100+
)
101+
api_key = authorization.split(" ")[1]
74102

75-
api_key = authorization.split(" ")[1]
76103
body = await request.body()
77104
data = json.loads(body)
78105

79106
# Add the vLLM base URL to the request
80-
config = Config.get_config()
81-
if config:
82-
data["base_url"] = config.provider_urls.get("vllm")
83-
else:
84-
data["base_url"] = ""
107+
base_url = self._get_base_url()
108+
data["base_url"] = base_url
85109

86110
is_fim_request = self._is_fim_request(request, data)
87111
try:
112+
# Pass the potentially None api_key to complete
88113
stream = await self.complete(data, api_key, is_fim_request=is_fim_request)
89114
except Exception as e:
90-
#  check if we have an status code there
115+
# Check if we have a status code there
91116
if hasattr(e, "status_code"):
92117
logger = structlog.get_logger("codegate")
93118
logger.error("Error in VLLMProvider completion", error=str(e))
119+
raise HTTPException(status_code=e.status_code, detail=str(e))
120+
raise e
94121

95-
raise HTTPException(status_code=e.status_code, detail=str(e)) # type: ignore
96-
else:
97-
# just continue raising the exception
98-
raise e
99122
return self._completion_handler.create_response(stream)

tests/integration/testcases.yaml

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
headers:
22
vllm:
3-
Authorization: Bearer ENV_VLLM_KEY
3+
Content-Type: application/json
44
openai:
55
Authorization: Bearer ENV_OPENAI_KEY
66
ollama:
@@ -161,40 +161,48 @@ testcases:
161161
"role":"user"
162162
}
163163
],
164-
"model":"Qwen/Qwen2.5-Coder-14B-Instruct",
164+
"model":"Qwen/Qwen2.5-Coder-0.5B-Instruct",
165165
"stream":true,
166166
"temperature":0
167167
}
168168
likes: |
169-
Hello! How can I assist you today? If you have any questions about software security, package analysis, or need guidance on secure coding practices, feel free to ask.
169+
Hello! How can I assist you today?
170170
171171
vllm_fim:
172172
name: VLLM FIM
173173
provider: vllm
174174
url: http://127.0.0.1:8989/vllm/completions
175175
data: |
176176
{
177-
"model": "Qwen/Qwen2.5-Coder-14B",
177+
"model": "Qwen/Qwen2.5-Coder-0.5B-Instruct",
178178
"max_tokens": 4096,
179179
"temperature": 0,
180180
"stream": true,
181-
"stop": ["<|endoftext|>", "<|fim_prefix|>", "<|fim_middle|>", "<|fim_suffix|>", "<|fim_pad|>", "<|repo_name|>", "<|file_sep|>", "<|im_start|>", "<|im_end|>", "/src/", "#- coding: utf-8", "```"],
181+
"stop": [
182+
"<|endoftext|>",
183+
"<|fim_prefix|>",
184+
"<|fim_middle|>",
185+
"<|fim_suffix|>",
186+
"<|fim_pad|>",
187+
"<|repo_name|>",
188+
"<|file_sep|>",
189+
"<|im_start|>",
190+
"<|im_end|>",
191+
"/src/",
192+
"#- coding: utf-8",
193+
"```"
194+
],
182195
"prompt":"<|fim_prefix|>\n# codegate/test.py\nimport invokehttp\nimport requests\n\nkey = \"mysecret-key\"\n\ndef call_api():\n <|fim_suffix|>\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n<|fim_middle|>"
183196
}
184197
likes: |
185-
# Create an instance of the InvokeHTTP class
186-
invoke = invokehttp.InvokeHTTP(key)
198+
return response.json()
187199
188-
# Call the API using the invoke_http method
189-
response = invoke.invoke_http(url, method='get', data=data)
200+
def test_call_api():
201+
response = call_api('http://localhost:8080', method='post', data='data')
202+
assert response['key1'] == 'test1' and response['key2'] == 'test2', "Test failed"
190203
191-
# Check the response status code
192-
if response.status_code == 200:
193-
# The API call was successful
194-
print(response.json())
195-
else:
196-
# The API call failed
197-
print('Error:', response.status_code)
204+
if __name__ == '__main__':
205+
test_call_api()
198206
199207
anthropic_chat:
200208
name: Anthropic Chat
@@ -333,4 +341,4 @@ testcases:
333341
334342
print(response.status_code)
335343
print(response.json())
336-
```
344+
```

0 commit comments

Comments
 (0)