Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit 2da3a74

Browse files
committed
Enable codegate enrichment tests
Signed-off-by: Radoslav Dimitrov <[email protected]>
1 parent a4c0509 commit 2da3a74

File tree

4 files changed

+93
-17
lines changed

4 files changed

+93
-17
lines changed

tests/integration/checks.py

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ def load(test_data: dict) -> List[BaseCheck]:
2929
checks.append(ContainsCheck(test_name))
3030
if test_data.get(DoesNotContainCheck.KEY):
3131
checks.append(DoesNotContainCheck(test_name))
32-
32+
if test_data.get(CodeGateEnrichment.KEY) is not None:
33+
checks.append(CodeGateEnrichment(test_name))
3334
return checks
3435

3536

@@ -51,11 +52,10 @@ async def run_check(self, parsed_response: str, test_data: dict) -> bool:
5152
similarity = await self._calculate_string_similarity(
5253
parsed_response, test_data[DistanceCheck.KEY]
5354
)
55+
logger.debug(f"Similarity: {similarity}")
56+
logger.debug(f"Response: {parsed_response}")
57+
logger.debug(f"Expected Response: {test_data[DistanceCheck.KEY]}")
5458
if similarity < 0.8:
55-
logger.error(f"Test {self.test_name} failed")
56-
logger.error(f"Similarity: {similarity}")
57-
logger.error(f"Response: {parsed_response}")
58-
logger.error(f"Expected Response: {test_data[DistanceCheck.KEY]}")
5959
return False
6060
return True
6161

@@ -64,10 +64,9 @@ class ContainsCheck(BaseCheck):
6464
KEY = "contains"
6565

6666
async def run_check(self, parsed_response: str, test_data: dict) -> bool:
67+
logger.debug(f"Response: {parsed_response}")
68+
logger.debug(f"Expected Response to contain: {test_data[ContainsCheck.KEY]}")
6769
if test_data[ContainsCheck.KEY].strip() not in parsed_response:
68-
logger.error(f"Test {self.test_name} failed")
69-
logger.error(f"Response: {parsed_response}")
70-
logger.error(f"Expected Response to contain: '{test_data[ContainsCheck.KEY]}'")
7170
return False
7271
return True
7372

@@ -76,11 +75,33 @@ class DoesNotContainCheck(BaseCheck):
7675
KEY = "does_not_contain"
7776

7877
async def run_check(self, parsed_response: str, test_data: dict) -> bool:
78+
logger.debug(f"Response: {parsed_response}")
79+
logger.debug(f"Expected Response to not contain: '{test_data[DoesNotContainCheck.KEY]}'")
7980
if test_data[DoesNotContainCheck.KEY].strip() in parsed_response:
80-
logger.error(f"Test {self.test_name} failed")
81-
logger.error(f"Response: {parsed_response}")
82-
logger.error(
83-
f"Expected Response to not contain: '{test_data[DoesNotContainCheck.KEY]}'"
84-
)
8581
return False
8682
return True
83+
84+
85+
class CodeGateEnrichment(BaseCheck):
86+
KEY = "codegate_enrichment"
87+
88+
async def run_check(self, parsed_response: str, test_data: dict) -> bool:
89+
direct_response = test_data["direct_response"]
90+
logger.debug(f"Response (CodeGate): {parsed_response}")
91+
logger.debug(f"Response (Raw model): {direct_response}")
92+
93+
# Use the DistanceCheck to compare the two responses
94+
distance_check = DistanceCheck(self.test_name)
95+
are_similar = await distance_check.run_check(
96+
parsed_response, {DistanceCheck.KEY: direct_response}
97+
)
98+
99+
# Check if the response is enriched by CodeGate.
100+
# If it is, there should be a difference in the similarity score.
101+
expect_enrichment = test_data.get(CodeGateEnrichment.KEY).get("expect_difference", False)
102+
if expect_enrichment:
103+
logger.info("CodeGate enrichment check: Expecting difference")
104+
return not are_similar
105+
# If the response is not enriched, the similarity score should be the same.
106+
logger.info("CodeGate enrichment check: Not expecting difference")
107+
return are_similar

tests/integration/integration_tests.py

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,26 @@
99
import requests
1010
import structlog
1111
import yaml
12-
from checks import CheckLoader
12+
from checks import CheckLoader, CodeGateEnrichment
1313
from dotenv import find_dotenv, load_dotenv
1414
from requesters import RequesterFactory
1515

1616
logger = structlog.get_logger("codegate")
1717

1818

19+
# call_directly is a function to call the model directly bypassing codegate
20+
def call_directly(url: str, headers: dict, data: dict) -> Optional[requests.Response]:
21+
try:
22+
headers["Content-Type"] = "application/json"
23+
stream = data.get("stream", False)
24+
response = requests.post(url, headers=headers, json=data, stream=stream)
25+
response.raise_for_status()
26+
return response
27+
except Exception as e:
28+
logger.error(f"Error making direct request to {url}: {str(e)}")
29+
return None
30+
31+
1932
class CodegateTestRunner:
2033
def __init__(self):
2134
self.requester_factory = RequesterFactory()
@@ -132,18 +145,27 @@ def replacement(match):
132145

133146
async def run_test(self, test: dict, test_headers: dict) -> bool:
134147
test_name = test["name"]
135-
url = test["url"]
136148
data = json.loads(test["data"])
137149
streaming = data.get("stream", False)
138150
provider = test["provider"]
139-
140151
logger.info(f"Starting test: {test_name}")
141152

142-
response = self.call_codegate(url, test_headers, data, provider)
153+
# Call Codegate
154+
response = self.call_codegate(test["url"], test_headers, data, provider)
143155
if not response:
144156
logger.error(f"Test {test_name} failed: No response received")
145157
return False
146158

159+
# Call model directly if specified
160+
direct_response = None
161+
if test.get(CodeGateEnrichment.KEY) is not None:
162+
direct_response = call_directly(
163+
test.get(CodeGateEnrichment.KEY)["provider_url"], test_headers, data
164+
)
165+
if not direct_response:
166+
logger.error(f"Test {test_name} failed: No direct response received")
167+
return False
168+
147169
# Debug response info
148170
logger.debug(f"Response status: {response.status_code}")
149171
logger.debug(f"Response headers: {dict(response.headers)}")
@@ -152,13 +174,24 @@ async def run_test(self, test: dict, test_headers: dict) -> bool:
152174
parsed_response = self.parse_response_message(response, streaming=streaming)
153175
logger.debug(f"Response message: {parsed_response}")
154176

177+
if direct_response:
178+
# Dirty hack to pass direct response to checks
179+
test["direct_response"] = self.parse_response_message(
180+
direct_response, streaming=streaming
181+
)
182+
logger.debug(f"Direct response message: {test['direct_response']}")
183+
155184
# Load appropriate checks for this test
156185
checks = CheckLoader.load(test)
157186

158187
# Run all checks
159188
all_passed = True
160189
for check in checks:
190+
logger.info(f"Running check: {check.__class__.__name__}")
161191
passed_check = await check.run_check(parsed_response, test)
192+
logger.info(
193+
f"Check {check.__class__.__name__} {'passed' if passed_check else 'failed'}"
194+
)
162195
if not passed_check:
163196
all_passed = False
164197

tests/integration/ollama/testcases.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ testcases:
3131
name: Ollama Chat
3232
provider: ollama
3333
url: http://127.0.0.1:8989/ollama/chat/completions
34+
codegate_enrichment:
35+
provider_url: http://127.0.0.1:11434/api/chat
36+
expect_difference: false
3437
data: |
3538
{
3639
"max_tokens":4096,
@@ -55,6 +58,9 @@ testcases:
5558
name: Ollama FIM
5659
provider: ollama
5760
url: http://127.0.0.1:8989/ollama/api/generate
61+
codegate_enrichment:
62+
provider_url: http://127.0.0.1:11434/api/generate
63+
expect_difference: false
5864
data: |
5965
{
6066
"stream": true,
@@ -88,6 +94,9 @@ testcases:
8894
name: Ollama Malicious Package
8995
provider: ollama
9096
url: http://127.0.0.1:8989/ollama/chat/completions
97+
codegate_enrichment:
98+
provider_url: http://127.0.0.1:11434/api/chat
99+
expect_difference: true
91100
data: |
92101
{
93102
"max_tokens":4096,
@@ -112,6 +121,9 @@ testcases:
112121
name: Ollama secret redacting chat
113122
provider: ollama
114123
url: http://127.0.0.1:8989/ollama/chat/completions
124+
codegate_enrichment:
125+
provider_url: http://127.0.0.1:11434/api/chat
126+
expect_difference: true
115127
data: |
116128
{
117129
"max_tokens":4096,

tests/integration/vllm/testcases.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ testcases:
3131
name: VLLM Chat
3232
provider: vllm
3333
url: http://127.0.0.1:8989/vllm/chat/completions
34+
codegate_enrichment:
35+
provider_url: http://127.0.0.1:8000/v1/chat/completions
36+
expect_difference: false
3437
data: |
3538
{
3639
"max_tokens":4096,
@@ -55,6 +58,10 @@ testcases:
5558
name: VLLM FIM
5659
provider: vllm
5760
url: http://127.0.0.1:8989/vllm/completions
61+
# This is commented out for now as there's some issue with parsing the streamed response from the model (on the vllm side, not codegate)
62+
# codegate_enrichment:
63+
# provider_url: http://127.0.0.1:8000/v1/completions
64+
# expect_difference: false
5865
data: |
5966
{
6067
"model": "Qwen/Qwen2.5-Coder-0.5B-Instruct",
@@ -84,6 +91,9 @@ testcases:
8491
name: VLLM Malicious Package
8592
provider: vllm
8693
url: http://127.0.0.1:8989/vllm/chat/completions
94+
codegate_enrichment:
95+
provider_url: http://127.0.0.1:8000/v1/chat/completions
96+
expect_difference: true
8797
data: |
8898
{
8999
"max_tokens":4096,

0 commit comments

Comments
 (0)