Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit 2ece4d7

Browse files
committed
Enable codegate enrichment tests
Signed-off-by: Radoslav Dimitrov <[email protected]>
1 parent a4c0509 commit 2ece4d7

File tree

4 files changed

+96
-17
lines changed

4 files changed

+96
-17
lines changed

tests/integration/checks.py

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ def load(test_data: dict) -> List[BaseCheck]:
2929
checks.append(ContainsCheck(test_name))
3030
if test_data.get(DoesNotContainCheck.KEY):
3131
checks.append(DoesNotContainCheck(test_name))
32-
32+
if test_data.get(CodeGateEnrichment.KEY) is not None:
33+
checks.append(CodeGateEnrichment(test_name))
3334
return checks
3435

3536

@@ -51,11 +52,11 @@ async def run_check(self, parsed_response: str, test_data: dict) -> bool:
5152
similarity = await self._calculate_string_similarity(
5253
parsed_response, test_data[DistanceCheck.KEY]
5354
)
55+
logger.info(f"Distance check: - {self.test_name}")
56+
logger.debug(f"Similarity: {similarity}")
57+
logger.debug(f"Response: {parsed_response}")
58+
logger.debug(f"Expected Response: {test_data[DistanceCheck.KEY]}")
5459
if similarity < 0.8:
55-
logger.error(f"Test {self.test_name} failed")
56-
logger.error(f"Similarity: {similarity}")
57-
logger.error(f"Response: {parsed_response}")
58-
logger.error(f"Expected Response: {test_data[DistanceCheck.KEY]}")
5960
return False
6061
return True
6162

@@ -64,10 +65,10 @@ class ContainsCheck(BaseCheck):
6465
KEY = "contains"
6566

6667
async def run_check(self, parsed_response: str, test_data: dict) -> bool:
68+
logger.info(f"Contains check: {self.test_name}")
69+
logger.debug(f"Response: {parsed_response}")
70+
logger.debug(f"Expected Response to contain: {test_data[ContainsCheck.KEY]}")
6771
if test_data[ContainsCheck.KEY].strip() not in parsed_response:
68-
logger.error(f"Test {self.test_name} failed")
69-
logger.error(f"Response: {parsed_response}")
70-
logger.error(f"Expected Response to contain: '{test_data[ContainsCheck.KEY]}'")
7172
return False
7273
return True
7374

@@ -76,11 +77,35 @@ class DoesNotContainCheck(BaseCheck):
7677
KEY = "does_not_contain"
7778

7879
async def run_check(self, parsed_response: str, test_data: dict) -> bool:
80+
logger.info(f"Does not contain check: {self.test_name}")
81+
logger.debug(f"Response: {parsed_response}")
82+
logger.debug(f"Expected Response to not contain: '{test_data[DoesNotContainCheck.KEY]}'")
7983
if test_data[DoesNotContainCheck.KEY].strip() in parsed_response:
80-
logger.error(f"Test {self.test_name} failed")
81-
logger.error(f"Response: {parsed_response}")
82-
logger.error(
83-
f"Expected Response to not contain: '{test_data[DoesNotContainCheck.KEY]}'"
84-
)
8584
return False
8685
return True
86+
87+
88+
class CodeGateEnrichment(BaseCheck):
89+
KEY = "codegate_enrichment"
90+
91+
async def run_check(self, parsed_response: str, test_data: dict) -> bool:
92+
direct_response = test_data["direct_response"]
93+
logger.info(f"CodeGate enrichment check: - {self.test_name}")
94+
logger.debug(f"Response (CodeGate): {parsed_response}")
95+
logger.debug(f"Response (Raw model): {direct_response}")
96+
97+
# Use the DistanceCheck to compare the two responses
98+
distance_check = DistanceCheck(self.test_name)
99+
are_similar = await distance_check.run_check(
100+
parsed_response, {DistanceCheck.KEY: direct_response}
101+
)
102+
103+
# Check if the response is enriched by CodeGate.
104+
# If it is, there should be a difference in the similarity score.
105+
expect_enrichment = test_data.get(CodeGateEnrichment.KEY).get("expect_difference", False)
106+
if expect_enrichment:
107+
logger.info("CodeGate enrichment check: Expecting difference")
108+
return not are_similar
109+
# If the response is not enriched, the similarity score should be the same.
110+
logger.info("CodeGate enrichment check: Not expecting difference")
111+
return are_similar

tests/integration/integration_tests.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,26 @@
99
import requests
1010
import structlog
1111
import yaml
12-
from checks import CheckLoader
12+
from checks import CheckLoader, CodeGateEnrichment
1313
from dotenv import find_dotenv, load_dotenv
1414
from requesters import RequesterFactory
1515

1616
logger = structlog.get_logger("codegate")
1717

1818

19+
# call_directly is a function to call the model directly bypassing codegate
20+
def call_directly(url: str, headers: dict, data: dict) -> Optional[requests.Response]:
21+
try:
22+
headers["Content-Type"] = "application/json"
23+
stream = data.get("stream", False)
24+
response = requests.post(url, headers=headers, json=data, stream=stream)
25+
response.raise_for_status()
26+
return response
27+
except Exception as e:
28+
logger.error(f"Error making direct request to {url}: {str(e)}")
29+
return None
30+
31+
1932
class CodegateTestRunner:
2033
def __init__(self):
2134
self.requester_factory = RequesterFactory()
@@ -132,18 +145,27 @@ def replacement(match):
132145

133146
async def run_test(self, test: dict, test_headers: dict) -> bool:
134147
test_name = test["name"]
135-
url = test["url"]
136148
data = json.loads(test["data"])
137149
streaming = data.get("stream", False)
138150
provider = test["provider"]
139-
140151
logger.info(f"Starting test: {test_name}")
141152

142-
response = self.call_codegate(url, test_headers, data, provider)
153+
# Call Codegate
154+
response = self.call_codegate(test["url"], test_headers, data, provider)
143155
if not response:
144156
logger.error(f"Test {test_name} failed: No response received")
145157
return False
146158

159+
# Call model directly if specified
160+
direct_response = None
161+
if test.get(CodeGateEnrichment.KEY) is not None:
162+
direct_response = call_directly(
163+
test.get(CodeGateEnrichment.KEY)["provider_url"], test_headers, data
164+
)
165+
if not direct_response:
166+
logger.error(f"Test {test_name} failed: No direct response received")
167+
return False
168+
147169
# Debug response info
148170
logger.debug(f"Response status: {response.status_code}")
149171
logger.debug(f"Response headers: {dict(response.headers)}")
@@ -152,13 +174,23 @@ async def run_test(self, test: dict, test_headers: dict) -> bool:
152174
parsed_response = self.parse_response_message(response, streaming=streaming)
153175
logger.debug(f"Response message: {parsed_response}")
154176

177+
if direct_response:
178+
# Dirty hack to pass direct response to checks
179+
test["direct_response"] = self.parse_response_message(
180+
direct_response, streaming=streaming
181+
)
182+
logger.debug(f"Direct response message: {test['direct_response']}")
183+
155184
# Load appropriate checks for this test
156185
checks = CheckLoader.load(test)
157186

158187
# Run all checks
159188
all_passed = True
160189
for check in checks:
161190
passed_check = await check.run_check(parsed_response, test)
191+
logger.info(
192+
f"Check {check.__class__.__name__} {'passed' if passed_check else 'failed'}"
193+
)
162194
if not passed_check:
163195
all_passed = False
164196

tests/integration/ollama/testcases.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ testcases:
3131
name: Ollama Chat
3232
provider: ollama
3333
url: http://127.0.0.1:8989/ollama/chat/completions
34+
codegate_enrichment:
35+
provider_url: http://127.0.0.1:11434/api/chat
36+
expect_difference: false
3437
data: |
3538
{
3639
"max_tokens":4096,
@@ -55,6 +58,9 @@ testcases:
5558
name: Ollama FIM
5659
provider: ollama
5760
url: http://127.0.0.1:8989/ollama/api/generate
61+
codegate_enrichment:
62+
provider_url: http://127.0.0.1:11434/api/generate
63+
expect_difference: false
5864
data: |
5965
{
6066
"stream": true,
@@ -88,6 +94,9 @@ testcases:
8894
name: Ollama Malicious Package
8995
provider: ollama
9096
url: http://127.0.0.1:8989/ollama/chat/completions
97+
codegate_enrichment:
98+
provider_url: http://127.0.0.1:11434/api/chat
99+
expect_difference: true
91100
data: |
92101
{
93102
"max_tokens":4096,
@@ -112,6 +121,9 @@ testcases:
112121
name: Ollama secret redacting chat
113122
provider: ollama
114123
url: http://127.0.0.1:8989/ollama/chat/completions
124+
codegate_enrichment:
125+
provider_url: http://127.0.0.1:11434/api/chat
126+
expect_difference: true
115127
data: |
116128
{
117129
"max_tokens":4096,

tests/integration/vllm/testcases.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ testcases:
3131
name: VLLM Chat
3232
provider: vllm
3333
url: http://127.0.0.1:8989/vllm/chat/completions
34+
codegate_enrichment:
35+
provider_url: http://127.0.0.1:8000/v1/chat/completions
36+
expect_difference: false
3437
data: |
3538
{
3639
"max_tokens":4096,
@@ -55,6 +58,10 @@ testcases:
5558
name: VLLM FIM
5659
provider: vllm
5760
url: http://127.0.0.1:8989/vllm/completions
61+
# This is commented out for now as there's some issue with parsing the streamed response from the model (on the vllm side, not codegate)
62+
# codegate_enrichment:
63+
# provider_url: http://127.0.0.1:8000/v1/completions
64+
# expect_difference: false
5865
data: |
5966
{
6067
"model": "Qwen/Qwen2.5-Coder-0.5B-Instruct",
@@ -84,6 +91,9 @@ testcases:
8491
name: VLLM Malicious Package
8592
provider: vllm
8693
url: http://127.0.0.1:8989/vllm/chat/completions
94+
codegate_enrichment:
95+
provider_url: http://127.0.0.1:8000/v1/chat/completions
96+
expect_difference: true
8797
data: |
8898
{
8999
"max_tokens":4096,

0 commit comments

Comments
 (0)