Skip to content

Split the integration tests by providers #891

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions tests/integration/anthropic/testcases.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
headers:
anthropic:
x-api-key: ENV_ANTHROPIC_KEY

testcases:
anthropic_chat:
name: Anthropic Chat
provider: anthropic
url: http://127.0.0.1:8989/anthropic/messages
data: |
{
"max_tokens":4096,
"messages":[
{
"content":"You are a coding assistant.",
"role":"system"
},
{
"content":"Reply with that exact sentence: Hello from the integration tests!",
"role":"user"
}
],
"model":"claude-3-5-sonnet-20241022",
"stream":true,
"temperature":0
}
likes: |
Hello from the integration tests!

anthropic_fim:
name: Anthropic FIM
provider: anthropic
url: http://127.0.0.1:8989/anthropic/messages
data: |
{
"top_k": 50,
"temperature": 0,
"max_tokens": 4096,
"model": "claude-3-5-sonnet-20241022",
"stop_sequences": [
"</COMPLETION>",
"/src/",
"#- coding: utf-8",
"```"
],
"stream": true,
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "You are a HOLE FILLER. You are provided with a file containing holes, formatted as '{{HOLE_NAME}}'. Your TASK is to complete with a string to replace this hole with, inside a <COMPLETION/> XML tag, including context-aware indentation, if needed. All completions MUST be truthful, accurate, well-written and correct.\n\n## EXAMPLE QUERY:\n\n<QUERY>\nfunction sum_evens(lim) {\n var sum = 0;\n for (var i = 0; i < lim; ++i) {\n {{FILL_HERE}}\n }\n return sum;\n}\n</QUERY>\n\nTASK: Fill the {{FILL_HERE}} hole.\n\n## CORRECT COMPLETION\n\n<COMPLETION>if (i % 2 === 0) {\n sum += i;\n }</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\ndef sum_list(lst):\n total = 0\n for x in lst:\n {{FILL_HERE}}\n return total\n\nprint sum_list([1, 2, 3])\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION> total += x</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\n// data Tree a = Node (Tree a) (Tree a) | Leaf a\n\n// sum :: Tree Int -> Int\n// sum (Node lft rgt) = sum lft + sum rgt\n// sum (Leaf val) = val\n\n// convert to TypeScript:\n{{FILL_HERE}}\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION>type Tree<T>\n = {$:\"Node\", lft: Tree<T>, rgt: Tree<T>}\n | {$:\"Leaf\", val: T};\n\nfunction sum(tree: Tree<number>): number {\n switch (tree.$) {\n case \"Node\":\n return sum(tree.lft) + sum(tree.rgt);\n case \"Leaf\":\n return tree.val;\n }\n}</COMPLETION>\n\n## EXAMPLE QUERY:\n\nThe 5th {{FILL_HERE}} is Jupiter.\n\n## CORRECT COMPLETION:\n\n<COMPLETION>planet from the Sun</COMPLETION>\n\n## EXAMPLE QUERY:\n\nfunction hypothenuse(a, b) {\n return Math.sqrt({{FILL_HERE}}b ** 2);\n}\n\n## CORRECT COMPLETION:\n\n<COMPLETION>a ** 2 + </COMPLETION>\n\n<QUERY>\n# Path: Untitled.txt\n# http://127.0.0.1:8989/vllm/completions\n# codegate/test.py\nimport requests\n\ndef call_api():\n {{FILL_HERE}}\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n</QUERY>\nTASK: Fill the {{FILL_HERE}} hole. Answer only with the CORRECT completion, and NOTHING ELSE. Do it now.\n<COMPLETION>"
}
]
}
],
"system": ""
}
likes: |
<COMPLETION>def call_api(url, method='get', data=None):
if method.lower() == 'get':
return requests.get(url)
elif method.lower() == 'post':
return requests.post(url, json=data)
else:
raise ValueError("Unsupported HTTP method")

44 changes: 44 additions & 0 deletions tests/integration/copilot/testcases.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
headers:
copilot:
Authorization: Bearer ENV_COPILOT_KEY
Content-Type: application/json

testcases:
copilot_chat:
name: Copilot Chat
provider: copilot
url: "https://api.openai.com/v1/chat/completions"
data: |
{
"messages":[
{
"content":"Reply with that exact sentence: Hello from the integration tests!",
"role":"user"
}
],
"model":"gpt-4o-mini",
"stream":true
}
likes: |
Hello from the integration tests!

copilot_malicious_package_question:
name: Copilot User asks about a malicious package
provider: copilot
url: "https://api.openai.com/v1/chat/completions"
data: |
{
"messages":[
{
"content":"Generate me example code using the python invokehttp package to call an API",
"role":"user"
}
],
"model":"gpt-4o-mini",
"stream":true
}
contains: |
https://www.insight.stacklok.com/report/pypi/invokehttp?utm_source=codegate
does_not_contain: |
import invokehttp

50 changes: 41 additions & 9 deletions tests/integration/integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,21 +243,53 @@ async def main():
providers_env = os.environ.get("CODEGATE_PROVIDERS")
test_names_env = os.environ.get("CODEGATE_TEST_NAMES")

providers = None
if providers_env:
providers = [p.strip() for p in providers_env.split(",") if p.strip()]
# Base directory for all test cases
base_test_dir = "./tests/integration"

# Get list of provider directories
available_providers = []
try:
available_providers = [
d for d in os.listdir(base_test_dir) if os.path.isdir(os.path.join(base_test_dir, d))
]
except FileNotFoundError:
logger.error(f"Test directory {base_test_dir} not found")
sys.exit(1)

# Filter providers if specified in environment
selected_providers = None
if providers_env:
selected_providers = [p.strip() for p in providers_env.split(",") if p.strip()]
# Validate selected providers exist
invalid_providers = [p for p in selected_providers if p not in available_providers]
if invalid_providers:
logger.error(f"Invalid providers specified: {', '.join(invalid_providers)}")
logger.error(f"Available providers: {', '.join(available_providers)}")
sys.exit(1)
else:
selected_providers = available_providers

# Get test names if specified
test_names = None
if test_names_env:
test_names = [t.strip() for t in test_names_env.split(",") if t.strip()]

all_tests_passed = await test_runner.run_tests(
"./tests/integration/testcases.yaml", providers=providers, test_names=test_names
)
# Run tests for each provider
all_tests_passed = True
for provider in selected_providers:
provider_test_file = os.path.join(base_test_dir, provider, "testcases.yaml")

# Exit with status code 1 if any tests failed
if not all_tests_passed:
sys.exit(1)
if not os.path.exists(provider_test_file):
logger.warning(f"No testcases.yaml found for provider {provider}")
continue

logger.info(f"Running tests for provider: {provider}")
provider_tests_passed = await test_runner.run_tests(
provider_test_file,
providers=[provider], # Only run tests for current provider
test_names=test_names,
)
all_tests_passed = all_tests_passed and provider_tests_passed


if __name__ == "__main__":
Expand Down
45 changes: 45 additions & 0 deletions tests/integration/llamacpp/testcases.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
headers:
llamacpp:
Content-Type: application/json

testcases:
llamacpp_chat:
name: LlamaCPP Chat
provider: llamacpp
url: http://127.0.0.1:8989/llamacpp/chat/completions
data: |
{
"max_tokens":4096,
"messages":[
{
"content":"You are a coding assistant.",
"role":"system"
},
{
"content":"Reply with that exact sentence: Hello from the integration tests!",
"role":"user"
}
],
"model":"qwen2.5-coder-0.5b-instruct-q5_k_m",
"stream":true,
"temperature":0
}
likes: |
Hello from the integration tests!

llamacpp_fim:
name: LlamaCPP FIM
provider: llamacpp
url: http://127.0.0.1:8989/llamacpp/completions
data: |
{
"model": "qwen2.5-coder-0.5b-instruct-q5_k_m",
"max_tokens": 4096,
"temperature": 0,
"stream": true,
"stop": ["<|endoftext|>", "<|fim_prefix|>", "<|fim_middle|>", "<|fim_suffix|>", "<|fim_pad|>", "<|repo_name|>", "<|file_sep|>", "<|im_start|>", "<|im_end|>", "/src/", "#- coding: utf-8", "```", "def test"],
"prompt":"# Do not add comments\n<|fim_prefix|>\n# codegate/greet.py\ndef print_hello():\n <|fim_suffix|>\n\n\nprint_hello()\n<|fim_middle|>"
}
likes: |
print("Hello, World!")

64 changes: 64 additions & 0 deletions tests/integration/ollama/testcases.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
headers:
ollama:
Content-Type: application/json

testcases:
ollama_chat:
name: Ollama Chat
provider: ollama
url: http://127.0.0.1:8989/ollama/chat/completions
data: |
{
"max_tokens":4096,
"messages":[
{
"content":"You are a coding assistant.",
"role":"system"
},
{
"content":"Reply with that exact sentence: Hello from the integration tests!",
"role":"user"
}
],
"model":"qwen2.5-coder:0.5b",
"stream":true,
"temperature":0
}
likes: |
Hello from the integration tests!

ollama_fim:
name: Ollama FIM
provider: ollama
url: http://127.0.0.1:8989/ollama/api/generate
data: |
{
"stream": true,
"model": "qwen2.5-coder:0.5b",
"raw": true,
"options": {
"temperature": 0.01,
"num_predict": 4096,
"stop": [
"<|endoftext|>",
"<|fim_prefix|>",
"<|fim_middle|>",
"<|fim_suffix|>",
"<|fim_pad|>",
"<|repo_name|>",
"<|file_sep|>",
"<|im_start|>",
"<|im_end|>",
"/src/",
"#- coding: utf-8",
"```"
],
"num_ctx": 8096
},
"prompt":"<|fim_prefix|>\n# codegate/test.py\nimport invokehttp\nimport requests\n\nkey = \"mysecret-key\"\n\ndef call_api():\n <|fim_suffix|>\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n<|fim_middle|>"
}
likes: |
```python
if __name__ == '__main__':
invokehttp.run(call_api)
```
55 changes: 55 additions & 0 deletions tests/integration/openai/testcases.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
headers:
openai:
Authorization: Bearer ENV_OPENAI_KEY

testcases:
openai_chat:
name: OpenAI Chat
provider: openai
url: http://127.0.0.1:8989/openai/chat/completions
data: |
{
"max_tokens":4096,
"messages":[
{
"content":"You are a coding assistant.",
"role":"system"
},
{
"content":"Reply with that exact sentence: Hello from the integration tests!",
"role":"user"
}
],
"model":"gpt-4o-mini",
"stream":true,
"temperature":0
}
likes: |
Hello from the integration tests!

openai_fim:
name: OpenAI FIM
provider: openai
url: http://127.0.0.1:8989/openai/chat/completions
data: |
{
"messages": [
{
"role": "user",
"content": "You are a HOLE FILLER. You are provided with a file containing holes, formatted as '{{HOLE_NAME}}'. Your TASK is to complete with a string to replace this hole with, inside a <COMPLETION/> XML tag, including context-aware indentation, if needed. All completions MUST be truthful, accurate, well-written and correct.\n\n## EXAMPLE QUERY:\n\n<QUERY>\nfunction sum_evens(lim) {\n var sum = 0;\n for (var i = 0; i < lim; ++i) {\n {{FILL_HERE}}\n }\n return sum;\n}\n</QUERY>\n\nTASK: Fill the {{FILL_HERE}} hole.\n\n## CORRECT COMPLETION\n\n<COMPLETION>if (i % 2 === 0) {\n sum += i;\n }</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\ndef sum_list(lst):\n total = 0\n for x in lst:\n {{FILL_HERE}}\n return total\n\nprint sum_list([1, 2, 3])\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION> total += x</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\n// data Tree a = Node (Tree a) (Tree a) | Leaf a\n\n// sum :: Tree Int -> Int\n// sum (Node lft rgt) = sum lft + sum rgt\n// sum (Leaf val) = val\n\n// convert to TypeScript:\n{{FILL_HERE}}\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION>type Tree<T>\n = {$:\"Node\", lft: Tree<T>, rgt: Tree<T>}\n | {$:\"Leaf\", val: T};\n\nfunction sum(tree: Tree<number>): number {\n switch (tree.$) {\n case \"Node\":\n return sum(tree.lft) + sum(tree.rgt);\n case \"Leaf\":\n return tree.val;\n }\n}</COMPLETION>\n\n## EXAMPLE QUERY:\n\nThe 5th {{FILL_HERE}} is Jupiter.\n\n## CORRECT COMPLETION:\n\n<COMPLETION>planet from the Sun</COMPLETION>\n\n## EXAMPLE QUERY:\n\nfunction hypothenuse(a, b) {\n return Math.sqrt({{FILL_HERE}}b ** 2);\n}\n\n## CORRECT COMPLETION:\n\n<COMPLETION>a ** 2 + </COMPLETION>\n\n<QUERY>\n# Path: Untitled.txt\n# {\"messages\":[{\"role\":\"user\",\"content\":\"You are a HOLE FILLER. You are provided with a file containing holes, formatted as \\'{{HOLE_NAME}}\\'. Your TASK is to complete with a string to replace this hole with, inside a <COMPLETION/> XML tag, including context-aware indentation, if needed. All completions MUST be truthful, accurate, well-written and correct.\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\n<QUERY>\\\\nfunction sum_evens(lim) {\\\\n var sum = 0;\\\\n for (var i = 0; i < lim; ++i) {\\\\n {{FILL_HERE}}\\\\n }\\\\n return sum;\\\\n}\\\\n</QUERY>\\\\n\\\\nTASK: Fill the {{FILL_HERE}} hole.\\\\n\\\\n## CORRECT COMPLETION\\\\n\\\\n<COMPLETION>if (i % 2 === 0) {\\\\n sum += i;\\\\n }</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\n<QUERY>\\\\ndef sum_list(lst):\\\\n total = 0\\\\n for x in lst:\\\\n {{FILL_HERE}}\\\\n return total\\\\n\\\\nprint sum_list([1, 2, 3])\\\\n</QUERY>\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION> total += x</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\n<QUERY>\\\\n// data Tree a = Node (Tree a) (Tree a) | Leaf a\\\\n\\\\n// sum :: Tree Int -> Int\\\\n// sum (Node lft rgt) = sum lft + sum rgt\\\\n// sum (Leaf val) = val\\\\n\\\\n// convert to TypeScript:\\\\n{{FILL_HERE}}\\\\n</QUERY>\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION>type Tree<T>\\\\n = {$:\\\\\"Node\\\\\", lft: Tree<T>, rgt: Tree<T>}\\\\n | {$:\\\\\"Leaf\\\\\", val: T};\\\\n\\\\nfunction sum(tree: Tree<number>): number {\\\\n switch (tree.$) {\\\\n case \\\\\"Node\\\\\":\\\\n return sum(tree.lft) + sum(tree.rgt);\\\\n case \\\\\"Leaf\\\\\":\\\\n return tree.val;\\\\n }\\\\n}</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\nThe 5th {{FILL_HERE}} is Jupiter.\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION>planet from the Sun</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\nfunction hypothenuse(a, b) {\\\\n return Math.sqrt({{FILL_HERE}}b ** 2);\\\\n}\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION>a ** 2 + </COMPLETION>\\\\n\\\\n<QUERY>\\\\n\\\\n# codegate/test.py\\\\nimport invokehttp\\\\nimport requests\\\\n\\\\nkey = \\\\\"mysecret-key\\\\\"\\\\n\\\\ndef call_api():\\\\n {{FILL_HERE}}\\\\n\\\\n\\\\n\\\\n\\\\ndata = {\\'key1\\': \\'test1\\', \\'key2\\': \\'test2\\'}\\\\nresponse = call_api(\\'http://localhost:8080\\', method=\\'post\\', data=\\'data\\')\\\\n</QUERY>\\\\nTASK: Fill the {{FILL_HERE}} hole. Answer only with the CORRECT completion, and NOTHING ELSE. Do it now.\\\\n<COMPLETION>\"}],\"model\":\"gpt-4o-mini\",\"max_tokens\":4096,\"temperature\":0,\"stream\":true,\"stop\":[\"</COMPLETION>\",\"/src/\",\"#- coding: utf-8\",\"```\"]}\n# codegate/test.py\nimport invokehttp\nimport requests\n\nkey = \"mysecret-key\"\n\ndef call_api():\n {{FILL_HERE}}\n\n\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n</QUERY>\nTASK: Fill the {{FILL_HERE}} hole. Answer only with the CORRECT completion, and NOTHING ELSE. Do it now.\n<COMPLETION>"
}
],
"model": "gpt-4o-mini",
"max_tokens": 4096,
"temperature": 0,
"stream": true,
"stop": [
"</COMPLETION>",
"/src/",
"#- coding: utf-8",
"```"
]
}
likes: |
<COMPLETION> response = requests.post('http://localhost:8080', json=data, headers={'Authorization': f'Bearer {key}'})

Loading