Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit b3cd064

Browse files
feat: remove duplicated alerts (#989)
* feat: remove duplicated alerts Sometimes the different client tools generate multiple requests when the user requests a task. This generates what it looks as a duplicate alert, but is not really a total duplicate as it belongs to different request. But for the user does not provide so much value to have it, so proceed with deduplicating those alerts, based on the code snippet and details of the alert Also remove dogecoin regex as it's giving false positives Closes: #875 * Moved function to remove duplicate alerts to v1_processing * Restore dogecoin --------- Co-authored-by: Alejandro Ponce <[email protected]>
1 parent 5a07145 commit b3cd064

File tree

1 file changed

+40
-4
lines changed

1 file changed

+40
-4
lines changed

src/codegate/api/v1_processing.py

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ async def _is_system_prompt(message: str) -> bool:
6262
return False
6363

6464

65-
async def parse_request(request_str: str) -> Tuple[Optional[List[str]], str]:
65+
async def parse_request(request_str: str) -> Tuple[Optional[List[str]], str]: # noqa: C901
6666
"""
6767
Parse the request string from the pipeline and return the message and the model.
6868
"""
@@ -105,7 +105,7 @@ async def parse_request(request_str: str) -> Tuple[Optional[List[str]], str]:
105105
return messages, model
106106

107107

108-
async def parse_output(output_str: str) -> Optional[str]:
108+
async def parse_output(output_str: str) -> Optional[str]: # noqa: C901
109109
"""
110110
Parse the output string from the pipeline and return the message.
111111
"""
@@ -392,7 +392,8 @@ async def match_conversations(
392392
qa = _get_question_answer_from_partial(selected_partial_qa)
393393
qa.question.message = parse_question_answer(qa.question.message)
394394
questions_answers.append(qa)
395-
alerts.extend(selected_partial_qa.alerts)
395+
deduped_alerts = await remove_duplicate_alerts(selected_partial_qa.alerts)
396+
alerts.extend(deduped_alerts)
396397
token_usage_agg.add_model_token_usage(selected_partial_qa.model_token_usage)
397398

398399
# only add conversation if we have some answers
@@ -480,10 +481,11 @@ async def parse_get_alert_conversation(
480481
The rows contain the raw request and output strings from the pipeline.
481482
"""
482483
_, map_q_id_to_conversation = await parse_messages_in_conversations(prompts_outputs)
484+
dedup_alerts = await remove_duplicate_alerts(alerts)
483485
async with asyncio.TaskGroup() as tg:
484486
tasks = [
485487
tg.create_task(parse_row_alert_conversation(row, map_q_id_to_conversation))
486-
for row in alerts
488+
for row in dedup_alerts
487489
]
488490
return [task.result() for task in tasks if task.result() is not None]
489491

@@ -499,3 +501,37 @@ async def parse_workspace_token_usage(
499501
for p_qa in partial_question_answers:
500502
token_usage_agg.add_model_token_usage(p_qa.model_token_usage)
501503
return token_usage_agg
504+
505+
506+
async def remove_duplicate_alerts(alerts: List[v1_models.Alert]) -> List[v1_models.Alert]:
507+
unique_alerts = []
508+
seen = defaultdict(list)
509+
510+
for alert in sorted(
511+
alerts, key=lambda x: x.timestamp, reverse=True
512+
): # Sort alerts by timestamp descending
513+
if alert.trigger_type != "codegate-secrets":
514+
unique_alerts.append(alert)
515+
continue
516+
517+
# Extract trigger string content until "Context"
518+
trigger_string_content = alert.trigger_string.split("Context")[0]
519+
520+
key = (
521+
alert.code_snippet,
522+
alert.trigger_type,
523+
alert.trigger_category,
524+
trigger_string_content,
525+
)
526+
527+
# If key exists and new alert is more recent, replace it
528+
if key in seen:
529+
existing_alert = seen[key]
530+
if abs((alert.timestamp - existing_alert.timestamp).total_seconds()) < 5:
531+
seen[key] = alert # Replace with newer alert
532+
continue
533+
534+
seen[key] = alert
535+
unique_alerts.append(alert)
536+
537+
return list(seen.values())

0 commit comments

Comments
 (0)