Skip to content

Commit a4c0509

Browse files
Add integration tests with muxing (#1035)
Replicate the current integration tests but instead of using the specific provider URL, e.g. `/ollama` use the muxing URL, i.e. `/v1/mux/`. Muxing functionality should take care of routing the request to the correct model and provider. For the moment we're only going to test with the "catch_all" rule. Meaning, all the requests will be directed to the same model. In future iterations we can expand the integration tests to check for multiple rules across different providers.
1 parent cac1011 commit a4c0509

File tree

10 files changed

+304
-55
lines changed

10 files changed

+304
-55
lines changed

src/codegate/muxing/adapter.py

Lines changed: 50 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ class BodyAdapter:
3232

3333
def _get_provider_formatted_url(self, model_route: rulematcher.ModelRoute) -> str:
3434
"""Get the provider formatted URL to use in base_url. Note this value comes from DB"""
35-
if model_route.endpoint.provider_type == db_models.ProviderType.openai:
35+
if model_route.endpoint.provider_type in [
36+
db_models.ProviderType.openai,
37+
db_models.ProviderType.vllm,
38+
]:
3639
return urljoin(model_route.endpoint.endpoint, "/v1")
3740
if model_route.endpoint.provider_type == db_models.ProviderType.openrouter:
3841
return urljoin(model_route.endpoint.endpoint, "/api/v1")
@@ -90,6 +93,47 @@ def _format_openai(self, chunk: str) -> str:
9093
cleaned_chunk = chunk.split("data:")[1].strip()
9194
return cleaned_chunk
9295

96+
def _format_antropic(self, chunk: str) -> str:
97+
"""
98+
Format the Anthropic chunk to OpenAI format.
99+
100+
This function is used by both chat and FIM formatters
101+
"""
102+
cleaned_chunk = chunk.split("data:")[1].strip()
103+
try:
104+
chunk_dict = json.loads(cleaned_chunk)
105+
msg_type = chunk_dict.get("type", "")
106+
107+
finish_reason = None
108+
if msg_type == "message_stop":
109+
finish_reason = "stop"
110+
111+
# In type == "content_block_start" the content comes in "content_block"
112+
# In type == "content_block_delta" the content comes in "delta"
113+
msg_content_dict = chunk_dict.get("delta", {}) or chunk_dict.get("content_block", {})
114+
# We couldn't obtain the content from the chunk. Skip it.
115+
if not msg_content_dict:
116+
return ""
117+
118+
msg_content = msg_content_dict.get("text", "")
119+
open_ai_chunk = ModelResponse(
120+
id=f"anthropic-chat-{str(uuid.uuid4())}",
121+
model="anthropic-muxed-model",
122+
object="chat.completion.chunk",
123+
choices=[
124+
StreamingChoices(
125+
finish_reason=finish_reason,
126+
index=0,
127+
delta=Delta(content=msg_content, role="assistant"),
128+
logprobs=None,
129+
)
130+
],
131+
)
132+
return open_ai_chunk.model_dump_json(exclude_none=True, exclude_unset=True)
133+
except Exception as e:
134+
logger.warning(f"Error formatting Anthropic chunk: {chunk}. Error: {e}")
135+
return cleaned_chunk.strip()
136+
93137
def _format_as_openai_chunk(self, formatted_chunk: str) -> str:
94138
"""Format the chunk as OpenAI chunk. This is the format how the clients expect the data."""
95139
chunk_to_send = f"data:{formatted_chunk}\n\n"
@@ -148,6 +192,8 @@ def provider_format_funcs(self) -> Dict[str, Callable]:
148192
db_models.ProviderType.llamacpp: self._format_openai,
149193
# OpenRouter is a dialect of OpenAI
150194
db_models.ProviderType.openrouter: self._format_openai,
195+
# VLLM is a dialect of OpenAI
196+
db_models.ProviderType.vllm: self._format_openai,
151197
}
152198

153199
def _format_ollama(self, chunk: str) -> str:
@@ -165,43 +211,6 @@ def _format_ollama(self, chunk: str) -> str:
165211
logger.warning(f"Error formatting Ollama chunk: {chunk}. Error: {e}")
166212
return chunk
167213

168-
def _format_antropic(self, chunk: str) -> str:
169-
"""Format the Anthropic chunk to OpenAI format."""
170-
cleaned_chunk = chunk.split("data:")[1].strip()
171-
try:
172-
chunk_dict = json.loads(cleaned_chunk)
173-
msg_type = chunk_dict.get("type", "")
174-
175-
finish_reason = None
176-
if msg_type == "message_stop":
177-
finish_reason = "stop"
178-
179-
# In type == "content_block_start" the content comes in "content_block"
180-
# In type == "content_block_delta" the content comes in "delta"
181-
msg_content_dict = chunk_dict.get("delta", {}) or chunk_dict.get("content_block", {})
182-
# We couldn't obtain the content from the chunk. Skip it.
183-
if not msg_content_dict:
184-
return ""
185-
186-
msg_content = msg_content_dict.get("text", "")
187-
open_ai_chunk = ModelResponse(
188-
id=f"anthropic-chat-{str(uuid.uuid4())}",
189-
model="anthropic-muxed-model",
190-
object="chat.completion.chunk",
191-
choices=[
192-
StreamingChoices(
193-
finish_reason=finish_reason,
194-
index=0,
195-
delta=Delta(content=msg_content, role="assistant"),
196-
logprobs=None,
197-
)
198-
],
199-
)
200-
return open_ai_chunk.model_dump_json(exclude_none=True, exclude_unset=True)
201-
except Exception as e:
202-
logger.warning(f"Error formatting Anthropic chunk: {chunk}. Error: {e}")
203-
return cleaned_chunk.strip()
204-
205214

206215
class FimStreamChunkFormatter(StreamChunkFormatter):
207216

@@ -218,6 +227,9 @@ def provider_format_funcs(self) -> Dict[str, Callable]:
218227
db_models.ProviderType.llamacpp: self._format_openai,
219228
# OpenRouter is a dialect of OpenAI
220229
db_models.ProviderType.openrouter: self._format_openai,
230+
# VLLM is a dialect of OpenAI
231+
db_models.ProviderType.vllm: self._format_openai,
232+
db_models.ProviderType.anthropic: self._format_antropic,
221233
}
222234

223235
def _format_ollama(self, chunk: str) -> str:

tests/integration/anthropic/testcases.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,31 @@ headers:
22
anthropic:
33
x-api-key: ENV_ANTHROPIC_KEY
44

5+
muxing:
6+
mux_url: http://127.0.0.1:8989/v1/mux/
7+
trimm_from_testcase_url: http://127.0.0.1:8989/anthropic/
8+
provider_endpoint:
9+
url: http://127.0.0.1:8989/api/v1/provider-endpoints
10+
headers:
11+
Content-Type: application/json
12+
data: |
13+
{
14+
"name": "anthropic_muxing",
15+
"description": "Muxing testing endpoint",
16+
"provider_type": "anthropic",
17+
"endpoint": "https://api.anthropic.com/",
18+
"auth_type": "api_key",
19+
"api_key": "ENV_ANTHROPIC_KEY"
20+
}
21+
muxes:
22+
url: http://127.0.0.1:8989/api/v1/workspaces/default/muxes
23+
headers:
24+
Content-Type: application/json
25+
rules:
26+
- model: claude-3-5-haiku-20241022
27+
matcher_type: catch_all
28+
matcher: ""
29+
530
testcases:
631
anthropic_chat:
732
name: Anthropic Chat

tests/integration/integration_tests.py

Lines changed: 89 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import asyncio
2+
import copy
23
import json
34
import os
45
import re
56
import sys
6-
from typing import Dict, Optional, Tuple
7+
from typing import Any, Dict, Optional, Tuple
78

89
import requests
910
import structlog
@@ -21,7 +22,7 @@ def __init__(self):
2122
self.failed_tests = [] # Track failed tests
2223

2324
def call_codegate(
24-
self, url: str, headers: dict, data: dict, provider: str
25+
self, url: str, headers: dict, data: dict, provider: str, method: str = "POST"
2526
) -> Optional[requests.Response]:
2627
logger.debug(f"Creating requester for provider: {provider}")
2728
requester = self.requester_factory.create_requester(provider)
@@ -31,12 +32,12 @@ def call_codegate(
3132
logger.debug(f"Headers: {headers}")
3233
logger.debug(f"Data: {data}")
3334

34-
response = requester.make_request(url, headers, data)
35+
response = requester.make_request(url, headers, data, method=method)
3536

3637
# Enhanced response logging
3738
if response is not None:
3839

39-
if response.status_code != 200:
40+
if response.status_code not in [200, 201, 204]:
4041
logger.debug(f"Response error status: {response.status_code}")
4142
logger.debug(f"Response error headers: {dict(response.headers)}")
4243
try:
@@ -174,7 +175,7 @@ async def run_test(self, test: dict, test_headers: dict) -> bool:
174175

175176
async def _get_testcases(
176177
self, testcases_dict: Dict, test_names: Optional[list[str]] = None
177-
) -> Dict:
178+
) -> Dict[str, Dict[str, str]]:
178179
testcases: Dict[str, Dict[str, str]] = testcases_dict["testcases"]
179180

180181
# Filter testcases by provider and test names
@@ -192,24 +193,102 @@ async def _get_testcases(
192193
testcases = filtered_testcases
193194
return testcases
194195

196+
async def _setup_muxing(
197+
self, provider: str, muxing_config: Optional[Dict]
198+
) -> Optional[Tuple[str, str]]:
199+
"""
200+
Muxing setup. Create the provider endpoints and the muxing rules
201+
202+
Return
203+
"""
204+
# The muxing section was not found in the testcases.yaml file. Nothing to do.
205+
if not muxing_config:
206+
return
207+
208+
# Create the provider endpoint
209+
provider_endpoint = muxing_config.get("provider_endpoint")
210+
try:
211+
data_with_api_keys = self.replace_env_variables(provider_endpoint["data"], os.environ)
212+
response_create_provider = self.call_codegate(
213+
provider=provider,
214+
url=provider_endpoint["url"],
215+
headers=provider_endpoint["headers"],
216+
data=json.loads(data_with_api_keys),
217+
)
218+
created_provider_endpoint = response_create_provider.json()
219+
except Exception as e:
220+
logger.warning(f"Could not setup provider endpoint for muxing: {e}")
221+
return
222+
logger.info("Created provider endpoint for muixing")
223+
224+
muxes_rules: Dict[str, Any] = muxing_config.get("muxes", {})
225+
try:
226+
# We need to first update all the muxes with the provider_id
227+
for mux in muxes_rules.get("rules", []):
228+
mux["provider_id"] = created_provider_endpoint["id"]
229+
230+
# The endpoint actually takes a list
231+
self.call_codegate(
232+
provider=provider,
233+
url=muxes_rules["url"],
234+
headers=muxes_rules["headers"],
235+
data=muxes_rules.get("rules", []),
236+
method="PUT",
237+
)
238+
except Exception as e:
239+
logger.warning(f"Could not setup muxing rules: {e}")
240+
return
241+
logger.info("Created muxing rules")
242+
243+
return muxing_config["mux_url"], muxing_config["trimm_from_testcase_url"]
244+
245+
async def _augment_testcases_with_muxing(
246+
self, testcases: Dict, mux_url: str, trimm_from_testcase_url: str
247+
) -> Dict:
248+
"""
249+
Augment the testcases with the muxing information. Copy the testcases
250+
and execute them through the muxing endpoint.
251+
"""
252+
test_cases_with_muxing = copy.deepcopy(testcases)
253+
for test_id, test_data in testcases.items():
254+
# Replace the provider in the URL with the muxed URL
255+
rest_of_path = test_data["url"].replace(trimm_from_testcase_url, "")
256+
new_url = f"{mux_url}{rest_of_path}"
257+
new_test_data = copy.deepcopy(test_data)
258+
new_test_data["url"] = new_url
259+
new_test_id = f"{test_id}_muxed"
260+
test_cases_with_muxing[new_test_id] = new_test_data
261+
262+
logger.info("Augmented testcases with muxing")
263+
return test_cases_with_muxing
264+
195265
async def _setup(
196-
self, testcases_file: str, test_names: Optional[list[str]] = None
266+
self, testcases_file: str, provider: str, test_names: Optional[list[str]] = None
197267
) -> Tuple[Dict, Dict]:
198268
with open(testcases_file, "r") as f:
199-
testcases_dict = yaml.safe_load(f)
269+
testcases_dict: Dict = yaml.safe_load(f)
200270

201271
headers = testcases_dict["headers"]
202272
testcases = await self._get_testcases(testcases_dict, test_names)
203-
return headers, testcases
273+
muxing_result = await self._setup_muxing(provider, testcases_dict.get("muxing", {}))
274+
# We don't have any muxing setup, return the headers and testcases
275+
if not muxing_result:
276+
return headers, testcases
277+
278+
mux_url, trimm_from_testcase_url = muxing_result
279+
test_cases_with_muxing = await self._augment_testcases_with_muxing(
280+
testcases, mux_url, trimm_from_testcase_url
281+
)
282+
283+
return headers, test_cases_with_muxing
204284

205285
async def run_tests(
206286
self,
207287
testcases_file: str,
208288
provider: str,
209289
test_names: Optional[list[str]] = None,
210290
) -> bool:
211-
headers, testcases = await self._setup(testcases_file, test_names)
212-
291+
headers, testcases = await self._setup(testcases_file, provider, test_names)
213292
if not testcases:
214293
logger.warning(
215294
f"No tests found for provider {provider} in file: {testcases_file} "

tests/integration/llamacpp/testcases.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,30 @@ headers:
22
llamacpp:
33
Content-Type: application/json
44

5+
muxing:
6+
mux_url: http://127.0.0.1:8989/v1/mux/
7+
trimm_from_testcase_url: http://127.0.0.1:8989/llamacpp/
8+
provider_endpoint:
9+
url: http://127.0.0.1:8989/api/v1/provider-endpoints
10+
headers:
11+
Content-Type: application/json
12+
data: |
13+
{
14+
"name": "llamacpp_muxing",
15+
"description": "Muxing testing endpoint",
16+
"provider_type": "llamacpp",
17+
"endpoint": "./codegate_volume/models",
18+
"auth_type": "none"
19+
}
20+
muxes:
21+
url: http://127.0.0.1:8989/api/v1/workspaces/default/muxes
22+
headers:
23+
Content-Type: application/json
24+
rules:
25+
- model: qwen2.5-coder-0.5b-instruct-q5_k_m
26+
matcher_type: catch_all
27+
matcher: ""
28+
529
testcases:
630
llamacpp_chat:
731
name: LlamaCPP Chat

tests/integration/ollama/testcases.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,30 @@ headers:
22
ollama:
33
Content-Type: application/json
44

5+
muxing:
6+
mux_url: http://127.0.0.1:8989/v1/mux/
7+
trimm_from_testcase_url: http://127.0.0.1:8989/ollama/
8+
provider_endpoint:
9+
url: http://127.0.0.1:8989/api/v1/provider-endpoints
10+
headers:
11+
Content-Type: application/json
12+
data: |
13+
{
14+
"name": "ollama_muxing",
15+
"description": "Muxing testing endpoint",
16+
"provider_type": "ollama",
17+
"endpoint": "http://127.0.0.1:11434",
18+
"auth_type": "none"
19+
}
20+
muxes:
21+
url: http://127.0.0.1:8989/api/v1/workspaces/default/muxes
22+
headers:
23+
Content-Type: application/json
24+
rules:
25+
- model: qwen2.5-coder:1.5b
26+
matcher_type: catch_all
27+
matcher: ""
28+
529
testcases:
630
ollama_chat:
731
name: Ollama Chat

0 commit comments

Comments
 (0)