diff --git a/src/codegate/extract_snippets/body_extractor.py b/src/codegate/extract_snippets/body_extractor.py
index adc4f3a9..be0c1884 100644
--- a/src/codegate/extract_snippets/body_extractor.py
+++ b/src/codegate/extract_snippets/body_extractor.py
@@ -6,6 +6,7 @@
ClineCodeSnippetExtractor,
CodeSnippetExtractor,
DefaultCodeSnippetExtractor,
+ KoduCodeSnippetExtractor,
OpenInterpreterCodeSnippetExtractor,
)
@@ -39,6 +40,19 @@ def _extract_from_user_messages(self, data: dict) -> set[str]:
filenames.extend(extracted_snippets.keys())
return set(filenames)
+ def _extract_from_list_user_messages(self, data: dict) -> set[str]:
+ filenames: List[str] = []
+ for msg in data.get("messages", []):
+ if msg.get("role", "") == "user":
+ msgs_content = msg.get("content", [])
+ for msg_content in msgs_content:
+ if msg_content.get("type", "") == "text":
+ extracted_snippets = self._snippet_extractor.extract_unique_snippets(
+ msg_content.get("text")
+ )
+ filenames.extend(extracted_snippets.keys())
+ return set(filenames)
+
@abstractmethod
def extract_unique_filenames(self, data: dict) -> set[str]:
"""
@@ -70,27 +84,8 @@ class ClineBodySnippetExtractor(BodyCodeSnippetExtractor):
def __init__(self):
self._snippet_extractor = ClineCodeSnippetExtractor()
- def _extract_from_user_messages(self, data: dict) -> set[str]:
- """
- The method extracts the code snippets from the user messages in the data got from Cline.
-
- It returns a set of filenames extracted from the code snippets.
- """
-
- filenames: List[str] = []
- for msg in data.get("messages", []):
- if msg.get("role", "") == "user":
- msgs_content = msg.get("content", [])
- for msg_content in msgs_content:
- if msg_content.get("type", "") == "text":
- extracted_snippets = self._snippet_extractor.extract_unique_snippets(
- msg_content.get("text")
- )
- filenames.extend(extracted_snippets.keys())
- return set(filenames)
-
def extract_unique_filenames(self, data: dict) -> set[str]:
- return self._extract_from_user_messages(data)
+ return self._extract_from_list_user_messages(data)
class OpenInterpreterBodySnippetExtractor(BodyCodeSnippetExtractor):
@@ -136,3 +131,12 @@ def extract_unique_filenames(self, data: dict) -> set[str]:
)
filenames.extend(extracted_snippets.keys())
return set(filenames)
+
+
+class KoduBodySnippetExtractor(BodyCodeSnippetExtractor):
+
+ def __init__(self):
+ self._snippet_extractor = KoduCodeSnippetExtractor()
+
+ def extract_unique_filenames(self, data: dict) -> set[str]:
+ return self._extract_from_list_user_messages(data)
diff --git a/src/codegate/extract_snippets/factory.py b/src/codegate/extract_snippets/factory.py
index 5f5f0231..000f1809 100644
--- a/src/codegate/extract_snippets/factory.py
+++ b/src/codegate/extract_snippets/factory.py
@@ -4,6 +4,7 @@
BodyCodeSnippetExtractor,
ClineBodySnippetExtractor,
ContinueBodySnippetExtractor,
+ KoduBodySnippetExtractor,
OpenInterpreterBodySnippetExtractor,
)
from codegate.extract_snippets.message_extractor import (
@@ -11,6 +12,7 @@
ClineCodeSnippetExtractor,
CodeSnippetExtractor,
DefaultCodeSnippetExtractor,
+ KoduCodeSnippetExtractor,
OpenInterpreterCodeSnippetExtractor,
)
@@ -24,6 +26,7 @@ def create_snippet_extractor(detected_client: ClientType) -> BodyCodeSnippetExtr
ClientType.CLINE: ClineBodySnippetExtractor(),
ClientType.AIDER: AiderBodySnippetExtractor(),
ClientType.OPEN_INTERPRETER: OpenInterpreterBodySnippetExtractor(),
+ ClientType.KODU: KoduBodySnippetExtractor(),
}
return mapping_client_extractor.get(detected_client, ContinueBodySnippetExtractor())
@@ -37,5 +40,6 @@ def create_snippet_extractor(detected_client: ClientType) -> CodeSnippetExtracto
ClientType.CLINE: ClineCodeSnippetExtractor(),
ClientType.AIDER: AiderCodeSnippetExtractor(),
ClientType.OPEN_INTERPRETER: OpenInterpreterCodeSnippetExtractor(),
+ ClientType.KODU: KoduCodeSnippetExtractor(),
}
return mapping_client_extractor.get(detected_client, DefaultCodeSnippetExtractor())
diff --git a/src/codegate/extract_snippets/message_extractor.py b/src/codegate/extract_snippets/message_extractor.py
index e9a7c968..bea5a2f2 100644
--- a/src/codegate/extract_snippets/message_extractor.py
+++ b/src/codegate/extract_snippets/message_extractor.py
@@ -69,6 +69,13 @@
re.DOTALL,
)
+KODU_CONTENT_PATTERN = re.compile(
+ r"[^\n>]+)\">" # Match the opening tag with path attribute
+ r"(?P.*?)" # Match the content (non-greedy)
+ r"", # Match the closing tag
+ re.DOTALL,
+)
+
class MatchedPatternSnippet(BaseModel):
"""
@@ -343,3 +350,21 @@ def _get_match_pattern_snippet(self, match: re.Match) -> MatchedPatternSnippet:
filename = match.group("filename")
content = match.group("content")
return MatchedPatternSnippet(language=matched_language, filename=filename, content=content)
+
+
+class KoduCodeSnippetExtractor(CodeSnippetExtractor):
+
+ @property
+ def codeblock_pattern(self) -> re.Pattern:
+ return [KODU_CONTENT_PATTERN]
+
+ @property
+ def codeblock_with_filename_pattern(self) -> re.Pattern:
+ return [KODU_CONTENT_PATTERN]
+
+ def _get_match_pattern_snippet(self, match: re.Match) -> MatchedPatternSnippet:
+ # We don't have language in the cline pattern
+ matched_language = None
+ filename = match.group("filename")
+ content = match.group("content")
+ return MatchedPatternSnippet(language=matched_language, filename=filename, content=content)
diff --git a/tests/extract_snippets/test_body_extractor.py b/tests/extract_snippets/test_body_extractor.py
index bc5738d4..1aa48bc7 100644
--- a/tests/extract_snippets/test_body_extractor.py
+++ b/tests/extract_snippets/test_body_extractor.py
@@ -5,6 +5,7 @@
from codegate.extract_snippets.body_extractor import (
ClineBodySnippetExtractor,
ContinueBodySnippetExtractor,
+ KoduBodySnippetExtractor,
OpenInterpreterBodySnippetExtractor,
)
@@ -213,3 +214,72 @@ def test_body_extract_continue_snippets(test_case: BodyCodeSnippetTest):
extractor = ContinueBodySnippetExtractor()
filenames = extractor.extract_unique_filenames(test_case.input_body_dict)
_evaluate_actual_filenames(filenames, test_case)
+
+
+@pytest.mark.parametrize(
+ "test_case",
+ [
+ # Analyze processed snippets from Kodu
+ BodyCodeSnippetTest(
+ input_body_dict={
+ "messages": [
+ {"role": "system", "content": "You are Kodu, an autonomous coding agent."},
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": """
+Here is our task for this conversation, you must remember it all time unless i tell you otherwise.
+
+please analyze
+
+ - Super critical information, the files attached here are part of the task and need to be
+ - The URLs attached here need to be scrapped and the information should be used for the
+ - The files passed in context are provided to help you understand the task better, the
+ import invokehttp
+import fastapi
+from fastapi import FastAPI, Request, Response, HTTPException
+import numpy
+
+GITHUB_TOKEN="ghp_1J9Z3Z2dfg4dfs23dsfsdf232aadfasdfasfasdf32"
+
+def add(a, b):
+ return a + b
+
+def multiply(a, b):
+ return a * b
+
+
+
+def substract(a, b):
+
+
+
+
+
+ """,
+ }
+ ],
+ },
+ {
+ "type": "text",
+ "text": """
+You must use a tool to proceed. Either use attempt_completion if you've completed the task,
+or ask_followup_question if you need more information. you must adhere to the tool format
+value1value2
+... additional parameters as needed in the same format
+...
+""",
+ },
+ ]
+ },
+ expected_count=1,
+ expected=["testing_file.py"],
+ ),
+ ],
+)
+def test_body_extract_kodu_snippets(test_case: BodyCodeSnippetTest):
+ extractor = KoduBodySnippetExtractor()
+ filenames = extractor.extract_unique_filenames(test_case.input_body_dict)
+ _evaluate_actual_filenames(filenames, test_case)
diff --git a/tests/extract_snippets/test_message_extractor.py b/tests/extract_snippets/test_message_extractor.py
index 07e4d8b3..db21896b 100644
--- a/tests/extract_snippets/test_message_extractor.py
+++ b/tests/extract_snippets/test_message_extractor.py
@@ -7,6 +7,7 @@
ClineCodeSnippetExtractor,
CodeSnippet,
DefaultCodeSnippetExtractor,
+ KoduCodeSnippetExtractor,
OpenInterpreterCodeSnippetExtractor,
)
@@ -714,6 +715,59 @@ def test_extract_openinterpreter_snippets(test_case: CodeSnippetTest):
_evaluate_actual_snippets(snippets, test_case)
+@pytest.mark.parametrize(
+ "test_case",
+ [
+ # Analyze processed snippets from OpenInterpreter
+ CodeSnippetTest(
+ input_message="""
+Here is our task for this conversation, you must remember it all time unless i tell you otherwise.
+
+please analyze
+
+ - Super critical information, the files attached here are part of the task and need to be
+ - The URLs attached here need to be scrapped and the information should be used for the
+ - The files passed in context are provided to help you understand the task better, the
+ import invokehttp
+import fastapi
+from fastapi import FastAPI, Request, Response, HTTPException
+import numpy
+
+GITHUB_TOKEN="ghp_1J9Z3Z2dfg4dfs23dsfsdf232aadfasdfasfasdf32"
+
+def add(a, b):
+ return a + b
+
+def multiply(a, b):
+ return a * b
+
+
+
+def substract(a, b):
+
+
+
+
+
+ """,
+ expected_count=1,
+ expected=[
+ CodeSnippet(
+ language="python",
+ filepath="testing_file.py",
+ code="def multiply(a, b):",
+ file_extension=".py",
+ ),
+ ],
+ ),
+ ],
+)
+def test_extract_kodu_snippets(test_case: CodeSnippetTest):
+ extractor = KoduCodeSnippetExtractor()
+ snippets = extractor.extract_snippets(test_case.input_message, require_filepath=True)
+ _evaluate_actual_snippets(snippets, test_case)
+
+
@pytest.mark.parametrize(
"filepath,expected",
[