Skip to content

Commit c542d83

Browse files
Add Kodu to the list of clients with supported code patterns (#1003)
* Add Kodu to the list of clients with supported code patterns Extracting patterns is necessary for appropriately mux between providers depending on the client. This PR enables Kodu. * fixing copy paste errors
1 parent 7a3e50f commit c542d83

File tree

5 files changed

+177
-20
lines changed

5 files changed

+177
-20
lines changed

src/codegate/extract_snippets/body_extractor.py

+24-20
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
ClineCodeSnippetExtractor,
77
CodeSnippetExtractor,
88
DefaultCodeSnippetExtractor,
9+
KoduCodeSnippetExtractor,
910
OpenInterpreterCodeSnippetExtractor,
1011
)
1112

@@ -39,6 +40,19 @@ def _extract_from_user_messages(self, data: dict) -> set[str]:
3940
filenames.extend(extracted_snippets.keys())
4041
return set(filenames)
4142

43+
def _extract_from_list_user_messages(self, data: dict) -> set[str]:
44+
filenames: List[str] = []
45+
for msg in data.get("messages", []):
46+
if msg.get("role", "") == "user":
47+
msgs_content = msg.get("content", [])
48+
for msg_content in msgs_content:
49+
if msg_content.get("type", "") == "text":
50+
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
51+
msg_content.get("text")
52+
)
53+
filenames.extend(extracted_snippets.keys())
54+
return set(filenames)
55+
4256
@abstractmethod
4357
def extract_unique_filenames(self, data: dict) -> set[str]:
4458
"""
@@ -70,27 +84,8 @@ class ClineBodySnippetExtractor(BodyCodeSnippetExtractor):
7084
def __init__(self):
7185
self._snippet_extractor = ClineCodeSnippetExtractor()
7286

73-
def _extract_from_user_messages(self, data: dict) -> set[str]:
74-
"""
75-
The method extracts the code snippets from the user messages in the data got from Cline.
76-
77-
It returns a set of filenames extracted from the code snippets.
78-
"""
79-
80-
filenames: List[str] = []
81-
for msg in data.get("messages", []):
82-
if msg.get("role", "") == "user":
83-
msgs_content = msg.get("content", [])
84-
for msg_content in msgs_content:
85-
if msg_content.get("type", "") == "text":
86-
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
87-
msg_content.get("text")
88-
)
89-
filenames.extend(extracted_snippets.keys())
90-
return set(filenames)
91-
9287
def extract_unique_filenames(self, data: dict) -> set[str]:
93-
return self._extract_from_user_messages(data)
88+
return self._extract_from_list_user_messages(data)
9489

9590

9691
class OpenInterpreterBodySnippetExtractor(BodyCodeSnippetExtractor):
@@ -136,3 +131,12 @@ def extract_unique_filenames(self, data: dict) -> set[str]:
136131
)
137132
filenames.extend(extracted_snippets.keys())
138133
return set(filenames)
134+
135+
136+
class KoduBodySnippetExtractor(BodyCodeSnippetExtractor):
137+
138+
def __init__(self):
139+
self._snippet_extractor = KoduCodeSnippetExtractor()
140+
141+
def extract_unique_filenames(self, data: dict) -> set[str]:
142+
return self._extract_from_list_user_messages(data)

src/codegate/extract_snippets/factory.py

+4
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,15 @@
44
BodyCodeSnippetExtractor,
55
ClineBodySnippetExtractor,
66
ContinueBodySnippetExtractor,
7+
KoduBodySnippetExtractor,
78
OpenInterpreterBodySnippetExtractor,
89
)
910
from codegate.extract_snippets.message_extractor import (
1011
AiderCodeSnippetExtractor,
1112
ClineCodeSnippetExtractor,
1213
CodeSnippetExtractor,
1314
DefaultCodeSnippetExtractor,
15+
KoduCodeSnippetExtractor,
1416
OpenInterpreterCodeSnippetExtractor,
1517
)
1618

@@ -24,6 +26,7 @@ def create_snippet_extractor(detected_client: ClientType) -> BodyCodeSnippetExtr
2426
ClientType.CLINE: ClineBodySnippetExtractor(),
2527
ClientType.AIDER: AiderBodySnippetExtractor(),
2628
ClientType.OPEN_INTERPRETER: OpenInterpreterBodySnippetExtractor(),
29+
ClientType.KODU: KoduBodySnippetExtractor(),
2730
}
2831
return mapping_client_extractor.get(detected_client, ContinueBodySnippetExtractor())
2932

@@ -37,5 +40,6 @@ def create_snippet_extractor(detected_client: ClientType) -> CodeSnippetExtracto
3740
ClientType.CLINE: ClineCodeSnippetExtractor(),
3841
ClientType.AIDER: AiderCodeSnippetExtractor(),
3942
ClientType.OPEN_INTERPRETER: OpenInterpreterCodeSnippetExtractor(),
43+
ClientType.KODU: KoduCodeSnippetExtractor(),
4044
}
4145
return mapping_client_extractor.get(detected_client, DefaultCodeSnippetExtractor())

src/codegate/extract_snippets/message_extractor.py

+25
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,13 @@
6969
re.DOTALL,
7070
)
7171

72+
KODU_CONTENT_PATTERN = re.compile(
73+
r"<file\s+path=\"(?P<filename>[^\n>]+)\">" # Match the opening tag with path attribute
74+
r"(?P<content>.*?)" # Match the content (non-greedy)
75+
r"</file>", # Match the closing tag
76+
re.DOTALL,
77+
)
78+
7279

7380
class MatchedPatternSnippet(BaseModel):
7481
"""
@@ -343,3 +350,21 @@ def _get_match_pattern_snippet(self, match: re.Match) -> MatchedPatternSnippet:
343350
filename = match.group("filename")
344351
content = match.group("content")
345352
return MatchedPatternSnippet(language=matched_language, filename=filename, content=content)
353+
354+
355+
class KoduCodeSnippetExtractor(CodeSnippetExtractor):
356+
357+
@property
358+
def codeblock_pattern(self) -> re.Pattern:
359+
return [KODU_CONTENT_PATTERN]
360+
361+
@property
362+
def codeblock_with_filename_pattern(self) -> re.Pattern:
363+
return [KODU_CONTENT_PATTERN]
364+
365+
def _get_match_pattern_snippet(self, match: re.Match) -> MatchedPatternSnippet:
366+
# We don't have language in the cline pattern
367+
matched_language = None
368+
filename = match.group("filename")
369+
content = match.group("content")
370+
return MatchedPatternSnippet(language=matched_language, filename=filename, content=content)

tests/extract_snippets/test_body_extractor.py

+70
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from codegate.extract_snippets.body_extractor import (
66
ClineBodySnippetExtractor,
77
ContinueBodySnippetExtractor,
8+
KoduBodySnippetExtractor,
89
OpenInterpreterBodySnippetExtractor,
910
)
1011

@@ -213,3 +214,72 @@ def test_body_extract_continue_snippets(test_case: BodyCodeSnippetTest):
213214
extractor = ContinueBodySnippetExtractor()
214215
filenames = extractor.extract_unique_filenames(test_case.input_body_dict)
215216
_evaluate_actual_filenames(filenames, test_case)
217+
218+
219+
@pytest.mark.parametrize(
220+
"test_case",
221+
[
222+
# Analyze processed snippets from Kodu
223+
BodyCodeSnippetTest(
224+
input_body_dict={
225+
"messages": [
226+
{"role": "system", "content": "You are Kodu, an autonomous coding agent."},
227+
{
228+
"role": "user",
229+
"content": [
230+
{
231+
"type": "text",
232+
"text": """
233+
Here is our task for this conversation, you must remember it all time unless i tell you otherwise.
234+
<task>
235+
please analyze
236+
<additional-context>
237+
- Super critical information, the files attached here are part of the task and need to be
238+
- The URLs attached here need to be scrapped and the information should be used for the
239+
- The files passed in context are provided to help you understand the task better, the
240+
<files count="1"><file path="testing_file.py">import invokehttp
241+
import fastapi
242+
from fastapi import FastAPI, Request, Response, HTTPException
243+
import numpy
244+
245+
GITHUB_TOKEN="ghp_1J9Z3Z2dfg4dfs23dsfsdf232aadfasdfasfasdf32"
246+
247+
def add(a, b):
248+
return a + b
249+
250+
def multiply(a, b):
251+
return a * b
252+
253+
254+
255+
def substract(a, b):
256+
</file></files>
257+
<urls></urls>
258+
</additional-context>
259+
260+
</task>
261+
""",
262+
}
263+
],
264+
},
265+
{
266+
"type": "text",
267+
"text": """
268+
You must use a tool to proceed. Either use attempt_completion if you've completed the task,
269+
or ask_followup_question if you need more information. you must adhere to the tool format
270+
<kodu_action><tool_name><parameter1_name>value1</parameter1_name><parameter2_name>value2
271+
</parameter2_name>... additional parameters as needed in the same format
272+
...</tool_name></kodu_action>
273+
""",
274+
},
275+
]
276+
},
277+
expected_count=1,
278+
expected=["testing_file.py"],
279+
),
280+
],
281+
)
282+
def test_body_extract_kodu_snippets(test_case: BodyCodeSnippetTest):
283+
extractor = KoduBodySnippetExtractor()
284+
filenames = extractor.extract_unique_filenames(test_case.input_body_dict)
285+
_evaluate_actual_filenames(filenames, test_case)

tests/extract_snippets/test_message_extractor.py

+54
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
ClineCodeSnippetExtractor,
88
CodeSnippet,
99
DefaultCodeSnippetExtractor,
10+
KoduCodeSnippetExtractor,
1011
OpenInterpreterCodeSnippetExtractor,
1112
)
1213

@@ -714,6 +715,59 @@ def test_extract_openinterpreter_snippets(test_case: CodeSnippetTest):
714715
_evaluate_actual_snippets(snippets, test_case)
715716

716717

718+
@pytest.mark.parametrize(
719+
"test_case",
720+
[
721+
# Analyze processed snippets from OpenInterpreter
722+
CodeSnippetTest(
723+
input_message="""
724+
Here is our task for this conversation, you must remember it all time unless i tell you otherwise.
725+
<task>
726+
please analyze
727+
<additional-context>
728+
- Super critical information, the files attached here are part of the task and need to be
729+
- The URLs attached here need to be scrapped and the information should be used for the
730+
- The files passed in context are provided to help you understand the task better, the
731+
<files count="1"><file path="testing_file.py">import invokehttp
732+
import fastapi
733+
from fastapi import FastAPI, Request, Response, HTTPException
734+
import numpy
735+
736+
GITHUB_TOKEN="ghp_1J9Z3Z2dfg4dfs23dsfsdf232aadfasdfasfasdf32"
737+
738+
def add(a, b):
739+
return a + b
740+
741+
def multiply(a, b):
742+
return a * b
743+
744+
745+
746+
def substract(a, b):
747+
</file></files>
748+
<urls></urls>
749+
</additional-context>
750+
751+
</task>
752+
""",
753+
expected_count=1,
754+
expected=[
755+
CodeSnippet(
756+
language="python",
757+
filepath="testing_file.py",
758+
code="def multiply(a, b):",
759+
file_extension=".py",
760+
),
761+
],
762+
),
763+
],
764+
)
765+
def test_extract_kodu_snippets(test_case: CodeSnippetTest):
766+
extractor = KoduCodeSnippetExtractor()
767+
snippets = extractor.extract_snippets(test_case.input_message, require_filepath=True)
768+
_evaluate_actual_snippets(snippets, test_case)
769+
770+
717771
@pytest.mark.parametrize(
718772
"filepath,expected",
719773
[

0 commit comments

Comments
 (0)