Skip to content

Commit 9e74583

Browse files
authored
Merge pull request #540 from stacklok/consider-ecosystem-in-bad-package-lookup
Consider ecosystem while looking up bad packages
2 parents 84d589a + 5b84312 commit 9e74583

File tree

3 files changed

+28
-11
lines changed

3 files changed

+28
-11
lines changed

src/codegate/pipeline/codegate_context_retriever/codegate.py

+14-10
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,21 @@ async def process(
7373
# Extract any code snippets
7474
snippets = extract_snippets(user_messages)
7575

76-
# Collect all packages referenced in the snippets
77-
snippet_packages = []
78-
for snippet in snippets:
79-
snippet_packages.extend(
80-
PackageExtractor.extract_packages(snippet.code, snippet.language)
76+
bad_snippet_packages = []
77+
if len(snippets) > 0:
78+
# Collect all packages referenced in the snippets
79+
snippet_packages = []
80+
for snippet in snippets:
81+
snippet_packages.extend(
82+
PackageExtractor.extract_packages(snippet.code, snippet.language)
83+
)
84+
logger.info(f"Found {len(snippet_packages)} packages in code snippets.")
85+
86+
# Find bad packages in the snippets
87+
bad_snippet_packages = await storage_engine.search(
88+
language=snippets[0].language, packages=snippet_packages
8189
)
82-
logger.info(f"Found {len(snippet_packages)} packages in code snippets.")
83-
84-
# Find bad packages in the snippets
85-
bad_snippet_packages = await storage_engine.search_by_property("name", snippet_packages)
86-
logger.info(f"Found {len(bad_snippet_packages)} bad packages in code snippets.")
90+
logger.info(f"Found {len(bad_snippet_packages)} bad packages in code snippets.")
8791

8892
# Remove code snippets from the user messages and search for bad packages
8993
# in the rest of the user query/messsages

src/codegate/pipeline/extract_snippets/output.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,9 @@ async def _snippet_comment(self, snippet: CodeSnippet, context: PipelineContext)
5252

5353
# Check if any of the snippet libraries is a bad package
5454
storage_engine = StorageEngine()
55-
libobjects = await storage_engine.search_by_property("name", snippet.libraries)
55+
libobjects = await storage_engine.search(
56+
language=snippet.language, packages=snippet.libraries
57+
)
5658
logger.info(f"Found {len(libobjects)} libraries in the storage engine")
5759

5860
# If no bad packages are found, just return empty comment

src/codegate/storage/storage_engine.py

+11
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@
1212

1313
logger = structlog.get_logger("codegate")
1414
VALID_ECOSYSTEMS = ["npm", "pypi", "crates", "maven", "go"]
15+
LANGUAGE_TO_ECOSYSTEM = {
16+
"javascript": "npm",
17+
"go": "go",
18+
"python": "pypi",
19+
"java": "maven",
20+
"rust": "crates",
21+
}
1522

1623

1724
class StorageEngine:
@@ -125,6 +132,7 @@ async def search_by_property(self, name: str, properties: List[str]) -> list[dic
125132
async def search(
126133
self,
127134
query: str = None,
135+
language: str = None,
128136
ecosystem: str = None,
129137
packages: List[str] = None,
130138
limit: int = 50,
@@ -136,6 +144,9 @@ async def search(
136144
try:
137145
cursor = self.conn.cursor()
138146

147+
if language and language in LANGUAGE_TO_ECOSYSTEM.keys():
148+
ecosystem = LANGUAGE_TO_ECOSYSTEM[language]
149+
139150
if packages and ecosystem and ecosystem in VALID_ECOSYSTEMS:
140151
placeholders = ",".join("?" * len(packages))
141152
query_sql = f"""

0 commit comments

Comments
 (0)