Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit 778e682

Browse files
committed
Enable codegate enrichment tests
Signed-off-by: Radoslav Dimitrov <[email protected]>
1 parent cac1011 commit 778e682

File tree

4 files changed

+96
-17
lines changed

4 files changed

+96
-17
lines changed

tests/integration/checks.py

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ def load(test_data: dict) -> List[BaseCheck]:
2929
checks.append(ContainsCheck(test_name))
3030
if test_data.get(DoesNotContainCheck.KEY):
3131
checks.append(DoesNotContainCheck(test_name))
32-
32+
if test_data.get(CodeGateEnrichment.KEY) is not None:
33+
checks.append(CodeGateEnrichment(test_name))
3334
return checks
3435

3536

@@ -51,11 +52,11 @@ async def run_check(self, parsed_response: str, test_data: dict) -> bool:
5152
similarity = await self._calculate_string_similarity(
5253
parsed_response, test_data[DistanceCheck.KEY]
5354
)
55+
logger.info(f"Distance check: - {self.test_name}")
56+
logger.debug(f"Similarity: {similarity}")
57+
logger.debug(f"Response: {parsed_response}")
58+
logger.debug(f"Expected Response: {test_data[DistanceCheck.KEY]}")
5459
if similarity < 0.8:
55-
logger.error(f"Test {self.test_name} failed")
56-
logger.error(f"Similarity: {similarity}")
57-
logger.error(f"Response: {parsed_response}")
58-
logger.error(f"Expected Response: {test_data[DistanceCheck.KEY]}")
5960
return False
6061
return True
6162

@@ -64,10 +65,10 @@ class ContainsCheck(BaseCheck):
6465
KEY = "contains"
6566

6667
async def run_check(self, parsed_response: str, test_data: dict) -> bool:
68+
logger.info(f"Contains check: {self.test_name}")
69+
logger.debug(f"Response: {parsed_response}")
70+
logger.debug(f"Expected Response to contain: {test_data[ContainsCheck.KEY]}")
6771
if test_data[ContainsCheck.KEY].strip() not in parsed_response:
68-
logger.error(f"Test {self.test_name} failed")
69-
logger.error(f"Response: {parsed_response}")
70-
logger.error(f"Expected Response to contain: '{test_data[ContainsCheck.KEY]}'")
7172
return False
7273
return True
7374

@@ -76,11 +77,35 @@ class DoesNotContainCheck(BaseCheck):
7677
KEY = "does_not_contain"
7778

7879
async def run_check(self, parsed_response: str, test_data: dict) -> bool:
80+
logger.info(f"Does not contain check: {self.test_name}")
81+
logger.debug(f"Response: {parsed_response}")
82+
logger.debug(f"Expected Response to not contain: '{test_data[DoesNotContainCheck.KEY]}'")
7983
if test_data[DoesNotContainCheck.KEY].strip() in parsed_response:
80-
logger.error(f"Test {self.test_name} failed")
81-
logger.error(f"Response: {parsed_response}")
82-
logger.error(
83-
f"Expected Response to not contain: '{test_data[DoesNotContainCheck.KEY]}'"
84-
)
8584
return False
8685
return True
86+
87+
88+
class CodeGateEnrichment(BaseCheck):
89+
KEY = "codegate_enrichment"
90+
91+
async def run_check(self, parsed_response: str, test_data: dict) -> bool:
92+
direct_response = test_data["direct_response"]
93+
logger.info(f"CodeGate enrichment check: - {self.test_name}")
94+
logger.debug(f"Response (CodeGate): {parsed_response}")
95+
logger.debug(f"Response (Raw model): {direct_response}")
96+
97+
# Use the DistanceCheck to compare the two responses
98+
distance_check = DistanceCheck(self.test_name)
99+
are_similar = await distance_check.run_check(
100+
parsed_response, {DistanceCheck.KEY: direct_response}
101+
)
102+
103+
# Check if the response is enriched by CodeGate.
104+
# If it is, there should be a difference in the similarity score.
105+
expect_enrichment = test_data.get(CodeGateEnrichment.KEY).get("expect_difference", False)
106+
if expect_enrichment:
107+
logger.info("CodeGate enrichment check: Expecting difference")
108+
return not are_similar
109+
# If the response is not enriched, the similarity score should be the same.
110+
logger.info("CodeGate enrichment check: Not expecting difference")
111+
return are_similar

tests/integration/integration_tests.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,26 @@
88
import requests
99
import structlog
1010
import yaml
11-
from checks import CheckLoader
11+
from checks import CheckLoader, CodeGateEnrichment
1212
from dotenv import find_dotenv, load_dotenv
1313
from requesters import RequesterFactory
1414

1515
logger = structlog.get_logger("codegate")
1616

1717

18+
# call_directly is a function to call the model directly bypassing codegate
19+
def call_directly(url: str, headers: dict, data: dict) -> Optional[requests.Response]:
20+
try:
21+
headers["Content-Type"] = "application/json"
22+
stream = data.get("stream", False)
23+
response = requests.post(url, headers=headers, json=data, stream=stream)
24+
response.raise_for_status()
25+
return response
26+
except Exception as e:
27+
logger.error(f"Error making direct request to {url}: {str(e)}")
28+
return None
29+
30+
1831
class CodegateTestRunner:
1932
def __init__(self):
2033
self.requester_factory = RequesterFactory()
@@ -131,18 +144,27 @@ def replacement(match):
131144

132145
async def run_test(self, test: dict, test_headers: dict) -> bool:
133146
test_name = test["name"]
134-
url = test["url"]
135147
data = json.loads(test["data"])
136148
streaming = data.get("stream", False)
137149
provider = test["provider"]
138-
139150
logger.info(f"Starting test: {test_name}")
140151

141-
response = self.call_codegate(url, test_headers, data, provider)
152+
# Call Codegate
153+
response = self.call_codegate(test["url"], test_headers, data, provider)
142154
if not response:
143155
logger.error(f"Test {test_name} failed: No response received")
144156
return False
145157

158+
# Call model directly if specified
159+
direct_response = None
160+
if test.get(CodeGateEnrichment.KEY) is not None:
161+
direct_response = call_directly(
162+
test.get(CodeGateEnrichment.KEY)["provider_url"], test_headers, data
163+
)
164+
if not direct_response:
165+
logger.error(f"Test {test_name} failed: No direct response received")
166+
return False
167+
146168
# Debug response info
147169
logger.debug(f"Response status: {response.status_code}")
148170
logger.debug(f"Response headers: {dict(response.headers)}")
@@ -151,13 +173,23 @@ async def run_test(self, test: dict, test_headers: dict) -> bool:
151173
parsed_response = self.parse_response_message(response, streaming=streaming)
152174
logger.debug(f"Response message: {parsed_response}")
153175

176+
if direct_response:
177+
# Dirty hack to pass direct response to checks
178+
test["direct_response"] = self.parse_response_message(
179+
direct_response, streaming=streaming
180+
)
181+
logger.debug(f"Direct response message: {test['direct_response']}")
182+
154183
# Load appropriate checks for this test
155184
checks = CheckLoader.load(test)
156185

157186
# Run all checks
158187
all_passed = True
159188
for check in checks:
160189
passed_check = await check.run_check(parsed_response, test)
190+
logger.info(
191+
f"Check {check.__class__.__name__} {'passed' if passed_check else 'failed'}"
192+
)
161193
if not passed_check:
162194
all_passed = False
163195

tests/integration/ollama/testcases.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ testcases:
77
name: Ollama Chat
88
provider: ollama
99
url: http://127.0.0.1:8989/ollama/chat/completions
10+
codegate_enrichment:
11+
provider_url: http://127.0.0.1:11434/api/chat
12+
expect_difference: false
1013
data: |
1114
{
1215
"max_tokens":4096,
@@ -31,6 +34,9 @@ testcases:
3134
name: Ollama FIM
3235
provider: ollama
3336
url: http://127.0.0.1:8989/ollama/api/generate
37+
codegate_enrichment:
38+
provider_url: http://127.0.0.1:11434/api/generate
39+
expect_difference: false
3440
data: |
3541
{
3642
"stream": true,
@@ -64,6 +70,9 @@ testcases:
6470
name: Ollama Malicious Package
6571
provider: ollama
6672
url: http://127.0.0.1:8989/ollama/chat/completions
73+
codegate_enrichment:
74+
provider_url: http://127.0.0.1:11434/api/chat
75+
expect_difference: true
6776
data: |
6877
{
6978
"max_tokens":4096,
@@ -88,6 +97,9 @@ testcases:
8897
name: Ollama secret redacting chat
8998
provider: ollama
9099
url: http://127.0.0.1:8989/ollama/chat/completions
100+
codegate_enrichment:
101+
provider_url: http://127.0.0.1:11434/api/chat
102+
expect_difference: true
91103
data: |
92104
{
93105
"max_tokens":4096,

tests/integration/vllm/testcases.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ testcases:
77
name: VLLM Chat
88
provider: vllm
99
url: http://127.0.0.1:8989/vllm/chat/completions
10+
codegate_enrichment:
11+
provider_url: http://127.0.0.1:8000/v1/chat/completions
12+
expect_difference: false
1013
data: |
1114
{
1215
"max_tokens":4096,
@@ -31,6 +34,10 @@ testcases:
3134
name: VLLM FIM
3235
provider: vllm
3336
url: http://127.0.0.1:8989/vllm/completions
37+
# This is commented out for now as there's some issue with parsing the streamed response from the model (on the vllm side, not codegate)
38+
# codegate_enrichment:
39+
# provider_url: http://127.0.0.1:8000/v1/completions
40+
# expect_difference: false
3441
data: |
3542
{
3643
"model": "Qwen/Qwen2.5-Coder-0.5B-Instruct",
@@ -60,6 +67,9 @@ testcases:
6067
name: VLLM Malicious Package
6168
provider: vllm
6269
url: http://127.0.0.1:8989/vllm/chat/completions
70+
codegate_enrichment:
71+
provider_url: http://127.0.0.1:8000/v1/chat/completions
72+
expect_difference: true
6373
data: |
6474
{
6575
"max_tokens":4096,

0 commit comments

Comments
 (0)