Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit 60834d7

Browse files
Add Kodu to the list of clients with supported code patterns
Extracting patterns is necessary for appropriately mux between providers depending on the client. This PR enables Kodu.
1 parent 7a3e50f commit 60834d7

File tree

5 files changed

+177
-20
lines changed

5 files changed

+177
-20
lines changed

src/codegate/extract_snippets/body_extractor.py

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
ClineCodeSnippetExtractor,
77
CodeSnippetExtractor,
88
DefaultCodeSnippetExtractor,
9+
KoduCodeSnippetExtractor,
910
OpenInterpreterCodeSnippetExtractor,
1011
)
1112

@@ -39,6 +40,19 @@ def _extract_from_user_messages(self, data: dict) -> set[str]:
3940
filenames.extend(extracted_snippets.keys())
4041
return set(filenames)
4142

43+
def _extract_from_list_user_messages(self, data: dict) -> set[str]:
44+
filenames: List[str] = []
45+
for msg in data.get("messages", []):
46+
if msg.get("role", "") == "user":
47+
msgs_content = msg.get("content", [])
48+
for msg_content in msgs_content:
49+
if msg_content.get("type", "") == "text":
50+
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
51+
msg_content.get("text")
52+
)
53+
filenames.extend(extracted_snippets.keys())
54+
return set(filenames)
55+
4256
@abstractmethod
4357
def extract_unique_filenames(self, data: dict) -> set[str]:
4458
"""
@@ -70,27 +84,8 @@ class ClineBodySnippetExtractor(BodyCodeSnippetExtractor):
7084
def __init__(self):
7185
self._snippet_extractor = ClineCodeSnippetExtractor()
7286

73-
def _extract_from_user_messages(self, data: dict) -> set[str]:
74-
"""
75-
The method extracts the code snippets from the user messages in the data got from Cline.
76-
77-
It returns a set of filenames extracted from the code snippets.
78-
"""
79-
80-
filenames: List[str] = []
81-
for msg in data.get("messages", []):
82-
if msg.get("role", "") == "user":
83-
msgs_content = msg.get("content", [])
84-
for msg_content in msgs_content:
85-
if msg_content.get("type", "") == "text":
86-
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
87-
msg_content.get("text")
88-
)
89-
filenames.extend(extracted_snippets.keys())
90-
return set(filenames)
91-
9287
def extract_unique_filenames(self, data: dict) -> set[str]:
93-
return self._extract_from_user_messages(data)
88+
return self._extract_from_list_user_messages(data)
9489

9590

9691
class OpenInterpreterBodySnippetExtractor(BodyCodeSnippetExtractor):
@@ -136,3 +131,12 @@ def extract_unique_filenames(self, data: dict) -> set[str]:
136131
)
137132
filenames.extend(extracted_snippets.keys())
138133
return set(filenames)
134+
135+
136+
class KoduBodySnippetExtractor(BodyCodeSnippetExtractor):
137+
138+
def __init__(self):
139+
self._snippet_extractor = KoduCodeSnippetExtractor()
140+
141+
def extract_unique_filenames(self, data: dict) -> set[str]:
142+
return self._extract_from_list_user_messages(data)

src/codegate/extract_snippets/factory.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,15 @@
44
BodyCodeSnippetExtractor,
55
ClineBodySnippetExtractor,
66
ContinueBodySnippetExtractor,
7+
KoduBodySnippetExtractor,
78
OpenInterpreterBodySnippetExtractor,
89
)
910
from codegate.extract_snippets.message_extractor import (
1011
AiderCodeSnippetExtractor,
1112
ClineCodeSnippetExtractor,
1213
CodeSnippetExtractor,
1314
DefaultCodeSnippetExtractor,
15+
KoduCodeSnippetExtractor,
1416
OpenInterpreterCodeSnippetExtractor,
1517
)
1618

@@ -24,6 +26,7 @@ def create_snippet_extractor(detected_client: ClientType) -> BodyCodeSnippetExtr
2426
ClientType.CLINE: ClineBodySnippetExtractor(),
2527
ClientType.AIDER: AiderBodySnippetExtractor(),
2628
ClientType.OPEN_INTERPRETER: OpenInterpreterBodySnippetExtractor(),
29+
ClientType.KODU: KoduBodySnippetExtractor(),
2730
}
2831
return mapping_client_extractor.get(detected_client, ContinueBodySnippetExtractor())
2932

@@ -37,5 +40,6 @@ def create_snippet_extractor(detected_client: ClientType) -> CodeSnippetExtracto
3740
ClientType.CLINE: ClineCodeSnippetExtractor(),
3841
ClientType.AIDER: AiderCodeSnippetExtractor(),
3942
ClientType.OPEN_INTERPRETER: OpenInterpreterCodeSnippetExtractor(),
43+
ClientType.KODU: KoduCodeSnippetExtractor(),
4044
}
4145
return mapping_client_extractor.get(detected_client, DefaultCodeSnippetExtractor())

src/codegate/extract_snippets/message_extractor.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,13 @@
6969
re.DOTALL,
7070
)
7171

72+
KODU_CONTENT_PATTERN = re.compile(
73+
r"<file\s+path=\"(?P<filename>[^\n>]+)\">" # Match the opening tag with path attribute
74+
r"(?P<content>.*?)" # Match the content (non-greedy)
75+
r"</file>", # Match the closing tag
76+
re.DOTALL,
77+
)
78+
7279

7380
class MatchedPatternSnippet(BaseModel):
7481
"""
@@ -343,3 +350,21 @@ def _get_match_pattern_snippet(self, match: re.Match) -> MatchedPatternSnippet:
343350
filename = match.group("filename")
344351
content = match.group("content")
345352
return MatchedPatternSnippet(language=matched_language, filename=filename, content=content)
353+
354+
355+
class KoduCodeSnippetExtractor(CodeSnippetExtractor):
356+
357+
@property
358+
def codeblock_pattern(self) -> re.Pattern:
359+
return [KODU_CONTENT_PATTERN]
360+
361+
@property
362+
def codeblock_with_filename_pattern(self) -> re.Pattern:
363+
return [KODU_CONTENT_PATTERN]
364+
365+
def _get_match_pattern_snippet(self, match: re.Match) -> MatchedPatternSnippet:
366+
# We don't have language in the cline pattern
367+
matched_language = None
368+
filename = match.group("filename")
369+
content = match.group("content")
370+
return MatchedPatternSnippet(language=matched_language, filename=filename, content=content)

tests/extract_snippets/test_body_extractor.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from codegate.extract_snippets.body_extractor import (
66
ClineBodySnippetExtractor,
77
ContinueBodySnippetExtractor,
8+
KoduBodySnippetExtractor,
89
OpenInterpreterBodySnippetExtractor,
910
)
1011

@@ -213,3 +214,72 @@ def test_body_extract_continue_snippets(test_case: BodyCodeSnippetTest):
213214
extractor = ContinueBodySnippetExtractor()
214215
filenames = extractor.extract_unique_filenames(test_case.input_body_dict)
215216
_evaluate_actual_filenames(filenames, test_case)
217+
218+
219+
@pytest.mark.parametrize(
220+
"test_case",
221+
[
222+
# Analyze processed snippets from OpenInterpreter
223+
BodyCodeSnippetTest(
224+
input_body_dict={
225+
"messages": [
226+
{"role": "system", "content": "You are Cline, a highly skilled software"},
227+
{
228+
"role": "user",
229+
"content": [
230+
{
231+
"type": "text",
232+
"text": """
233+
Here is our task for this conversation, you must remember it all time unless i tell you otherwise.
234+
<task>
235+
please analyze
236+
<additional-context>
237+
- Super critical information, the files attached here are part of the task and need to be
238+
- The URLs attached here need to be scrapped and the information should be used for the
239+
- The files passed in context are provided to help you understand the task better, the
240+
<files count="1"><file path="testing_file.py">import invokehttp
241+
import fastapi
242+
from fastapi import FastAPI, Request, Response, HTTPException
243+
import numpy
244+
245+
GITHUB_TOKEN="ghp_1J9Z3Z2dfg4dfs23dsfsdf232aadfasdfasfasdf32"
246+
247+
def add(a, b):
248+
return a + b
249+
250+
def multiply(a, b):
251+
return a * b
252+
253+
254+
255+
def substract(a, b):
256+
</file></files>
257+
<urls></urls>
258+
</additional-context>
259+
260+
</task>
261+
""",
262+
}
263+
],
264+
},
265+
{
266+
"type": "text",
267+
"text": """
268+
You must use a tool to proceed. Either use attempt_completion if you've completed the task,
269+
or ask_followup_question if you need more information. you must adhere to the tool format
270+
<kodu_action><tool_name><parameter1_name>value1</parameter1_name><parameter2_name>value2
271+
</parameter2_name>... additional parameters as needed in the same format
272+
...</tool_name></kodu_action>
273+
""",
274+
},
275+
]
276+
},
277+
expected_count=1,
278+
expected=["testing_file.py"],
279+
),
280+
],
281+
)
282+
def test_body_extract_kodu_snippets(test_case: BodyCodeSnippetTest):
283+
extractor = KoduBodySnippetExtractor()
284+
filenames = extractor.extract_unique_filenames(test_case.input_body_dict)
285+
_evaluate_actual_filenames(filenames, test_case)

tests/extract_snippets/test_message_extractor.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
ClineCodeSnippetExtractor,
88
CodeSnippet,
99
DefaultCodeSnippetExtractor,
10+
KoduCodeSnippetExtractor,
1011
OpenInterpreterCodeSnippetExtractor,
1112
)
1213

@@ -714,6 +715,59 @@ def test_extract_openinterpreter_snippets(test_case: CodeSnippetTest):
714715
_evaluate_actual_snippets(snippets, test_case)
715716

716717

718+
@pytest.mark.parametrize(
719+
"test_case",
720+
[
721+
# Analyze processed snippets from OpenInterpreter
722+
CodeSnippetTest(
723+
input_message="""
724+
Here is our task for this conversation, you must remember it all time unless i tell you otherwise.
725+
<task>
726+
please analyze
727+
<additional-context>
728+
- Super critical information, the files attached here are part of the task and need to be
729+
- The URLs attached here need to be scrapped and the information should be used for the
730+
- The files passed in context are provided to help you understand the task better, the
731+
<files count="1"><file path="testing_file.py">import invokehttp
732+
import fastapi
733+
from fastapi import FastAPI, Request, Response, HTTPException
734+
import numpy
735+
736+
GITHUB_TOKEN="ghp_1J9Z3Z2dfg4dfs23dsfsdf232aadfasdfasfasdf32"
737+
738+
def add(a, b):
739+
return a + b
740+
741+
def multiply(a, b):
742+
return a * b
743+
744+
745+
746+
def substract(a, b):
747+
</file></files>
748+
<urls></urls>
749+
</additional-context>
750+
751+
</task>
752+
""",
753+
expected_count=1,
754+
expected=[
755+
CodeSnippet(
756+
language="python",
757+
filepath="testing_file.py",
758+
code="def multiply(a, b):",
759+
file_extension=".py",
760+
),
761+
],
762+
),
763+
],
764+
)
765+
def test_extract_kodu_snippets(test_case: CodeSnippetTest):
766+
extractor = KoduCodeSnippetExtractor()
767+
snippets = extractor.extract_snippets(test_case.input_message, require_filepath=True)
768+
_evaluate_actual_snippets(snippets, test_case)
769+
770+
717771
@pytest.mark.parametrize(
718772
"filepath,expected",
719773
[

0 commit comments

Comments
 (0)