Use the client type when instantiating and running provider pipelines

jhrozek · jhrozek · commit ec96a314f33b · 2025-02-03T17:50:56.000+01:00
Instead of detecting the client type again when the pipeline is being processed, let's pass the client type on instantiating the pipeline instance as a constant and replace the hardcoded client strings by just using the constants. Related: #830
diff --git a/src/codegate/pipeline/base.py b/src/codegate/pipeline/base.py
@@ -11,9 +11,9 @@
 from litellm import ChatCompletionRequest, ModelResponse
 from pydantic import BaseModel
 
+from codegate.clients.clients import ClientType
 from codegate.db.models import Alert, Output, Prompt
 from codegate.pipeline.secrets.manager import SecretsManager
-from codegate.utils.utils import get_tool_name_from_messages
 
 logger = structlog.get_logger("codegate")
 
@@ -81,6 +81,7 @@ class PipelineContext:
     shortcut_response: bool = False
     bad_packages_found: bool = False
     secrets_found: bool = False
+    client: ClientType = ClientType.GENERIC
 
     def add_code_snippet(self, snippet: CodeSnippet):
         self.code_snippets.append(snippet)
@@ -241,12 +242,14 @@ def get_last_user_message(
     @staticmethod
     def get_last_user_message_block(
         request: ChatCompletionRequest,
+        client: ClientType = ClientType.GENERIC,
     ) -> Optional[tuple[str, int]]:
         """
         Get the last block of consecutive 'user' messages from the request.
 
         Args:
             request (ChatCompletionRequest): The chat completion request to process
+            client (ClientType): The client type to consider when processing the request
 
         Returns:
             Optional[str, int]: A string containing all consecutive user messages in the
@@ -261,9 +264,8 @@ def get_last_user_message_block(
         messages = request["messages"]
         block_start_index = None
 
-        base_tool = get_tool_name_from_messages(request)
         accepted_roles = ["user", "assistant"]
-        if base_tool == "open interpreter":
+        if client == ClientType.OPEN_INTERPRETER:
             # open interpreter also uses the role "tool"
             accepted_roles.append("tool")
 
@@ -328,12 +330,16 @@ async def process(
 
 class InputPipelineInstance:
     def __init__(
-        self, pipeline_steps: List[PipelineStep], secret_manager: SecretsManager, is_fim: bool
+        self,
+        pipeline_steps: List[PipelineStep],
+        secret_manager: SecretsManager,
+        is_fim: bool,
+        client: ClientType = ClientType.GENERIC,
     ):
         self.pipeline_steps = pipeline_steps
         self.secret_manager = secret_manager
         self.is_fim = is_fim
-        self.context = PipelineContext()
+        self.context = PipelineContext(client=client)
 
         # we create the sesitive context here so that it is not shared between individual requests
         # TODO: could we get away with just generating the session ID for an instance?
@@ -392,16 +398,25 @@ async def process_request(
 
 class SequentialPipelineProcessor:
     def __init__(
-        self, pipeline_steps: List[PipelineStep], secret_manager: SecretsManager, is_fim: bool
+        self,
+        pipeline_steps: List[PipelineStep],
+        secret_manager: SecretsManager,
+        client_type: ClientType,
+        is_fim: bool,
     ):
         self.pipeline_steps = pipeline_steps
         self.secret_manager = secret_manager
         self.is_fim = is_fim
-        self.instance = self._create_instance()
+        self.instance = self._create_instance(client_type)
 
-    def _create_instance(self) -> InputPipelineInstance:
+    def _create_instance(self, client_type: ClientType) -> InputPipelineInstance:
         """Create a new pipeline instance for processing a request"""
-        return InputPipelineInstance(self.pipeline_steps, self.secret_manager, self.is_fim)
+        return InputPipelineInstance(
+            self.pipeline_steps,
+            self.secret_manager,
+            self.is_fim,
+            client_type,
+        )
 
     async def process_request(
         self,
diff --git a/src/codegate/pipeline/cli/cli.py b/src/codegate/pipeline/cli/cli.py
@@ -4,14 +4,14 @@
 
 from litellm import ChatCompletionRequest
 
+from codegate.clients.clients import ClientType
 from codegate.pipeline.base import (
     PipelineContext,
     PipelineResponse,
     PipelineResult,
     PipelineStep,
 )
 from codegate.pipeline.cli.commands import CustomInstructions, Version, Workspace
-from codegate.utils.utils import get_tool_name_from_messages
 
 HELP_TEXT = """
 ## CodeGate CLI\n
@@ -110,12 +110,11 @@ async def process(
         if last_user_message is not None:
             last_user_message_str, _ = last_user_message
             last_user_message_str = last_user_message_str.strip()
-            base_tool = get_tool_name_from_messages(request)
             codegate_regex = re.compile(r"^codegate(?:\s+(.*))?", re.IGNORECASE)
 
-            if base_tool and base_tool in ["cline", "kodu"]:
+            if context.client in [ClientType.CLINE, ClientType.KODU]:
                 match = _get_cli_from_cline(codegate_regex, last_user_message_str)
-            elif base_tool == "open interpreter":
+            elif context.client in [ClientType.OPEN_INTERPRETER]:
                 match = _get_cli_from_open_interpreter(last_user_message_str)
             else:
                 # Check if "codegate" is the first word in the message
@@ -130,7 +129,7 @@ async def process(
                 if args:
                     context.shortcut_response = True
                     cmd_out = await codegate_cli(args[1:])
-                    if base_tool in ["cline", "kodu"]:
+                    if context.client in [ClientType.CLINE, ClientType.KODU]:
                         cmd_out = (
                             f"<attempt_completion><result>{cmd_out}</result></attempt_completion>\n"
                         )
diff --git a/src/codegate/pipeline/codegate_context_retriever/codegate.py b/src/codegate/pipeline/codegate_context_retriever/codegate.py
@@ -60,7 +60,7 @@ async def process(  # noqa: C901
         Use RAG DB to add context to the user request
         """
         # Get the latest user message
-        last_message = self.get_last_user_message_block(request)
+        last_message = self.get_last_user_message_block(request, context.client)
         if not last_message:
             return PipelineResult(request=request)
         user_message, last_user_idx = last_message
diff --git a/src/codegate/pipeline/extract_snippets/extract_snippets.py b/src/codegate/pipeline/extract_snippets/extract_snippets.py
@@ -150,7 +150,7 @@ async def process(
         request: ChatCompletionRequest,
         context: PipelineContext,
     ) -> PipelineResult:
-        last_message = self.get_last_user_message_block(request)
+        last_message = self.get_last_user_message_block(request, context.client)
         if not last_message:
             return PipelineResult(request=request, context=context)
         msg_content, _ = last_message
diff --git a/src/codegate/pipeline/factory.py b/src/codegate/pipeline/factory.py
@@ -1,5 +1,6 @@
 from typing import List
 
+from codegate.clients.clients import ClientType
 from codegate.config import Config
 from codegate.pipeline.base import PipelineStep, SequentialPipelineProcessor
 from codegate.pipeline.cli.cli import CodegateCli
@@ -20,7 +21,7 @@ class PipelineFactory:
     def __init__(self, secrets_manager: SecretsManager):
         self.secrets_manager = secrets_manager
 
-    def create_input_pipeline(self) -> SequentialPipelineProcessor:
+    def create_input_pipeline(self, client_type: ClientType) -> SequentialPipelineProcessor:
         input_steps: List[PipelineStep] = [
             # make sure that this step is always first in the pipeline
             # the other steps might send the request to a LLM for it to be analyzed
@@ -32,13 +33,23 @@ def create_input_pipeline(self) -> SequentialPipelineProcessor:
             CodegateContextRetriever(),
             SystemPrompt(Config.get_config().prompts.default_chat),
         ]
-        return SequentialPipelineProcessor(input_steps, self.secrets_manager, is_fim=False)
+        return SequentialPipelineProcessor(
+            input_steps,
+            self.secrets_manager,
+            client_type,
+            is_fim=False,
+        )
 
-    def create_fim_pipeline(self) -> SequentialPipelineProcessor:
+    def create_fim_pipeline(self, client_type: ClientType) -> SequentialPipelineProcessor:
         fim_steps: List[PipelineStep] = [
             CodegateSecrets(),
         ]
-        return SequentialPipelineProcessor(fim_steps, self.secrets_manager, is_fim=True)
+        return SequentialPipelineProcessor(
+            fim_steps,
+            self.secrets_manager,
+            client_type,
+            is_fim=True,
+        )
 
     def create_output_pipeline(self) -> OutputPipelineProcessor:
         output_steps: List[OutputPipelineStep] = [
diff --git a/src/codegate/pipeline/secrets/secrets.py b/src/codegate/pipeline/secrets/secrets.py
@@ -272,7 +272,7 @@ async def process(
         total_matches = []
 
         # get last user message block to get index for the first relevant user message
-        last_user_message = self.get_last_user_message_block(new_request)
+        last_user_message = self.get_last_user_message_block(new_request, context.client)
         last_assistant_idx = -1
         if last_user_message:
             _, user_idx = last_user_message
diff --git a/src/codegate/providers/anthropic/provider.py b/src/codegate/providers/anthropic/provider.py
@@ -62,7 +62,7 @@ async def process_request(
     ):
         is_fim_request = self._is_fim_request(request_url_path, data)
         try:
-            stream = await self.complete(data, api_key, is_fim_request)
+            stream = await self.complete(data, api_key, is_fim_request, client_type)
         except Exception as e:
             #  check if we have an status code there
             if hasattr(e, "status_code"):
diff --git a/src/codegate/providers/base.py b/src/codegate/providers/base.py
@@ -137,15 +137,16 @@ async def _run_input_pipeline(
         normalized_request: ChatCompletionRequest,
         api_key: Optional[str],
         api_base: Optional[str],
+        client_type: ClientType,
         is_fim_request: bool,
     ) -> PipelineResult:
         # Decide which pipeline processor to use
         if is_fim_request:
-            pipeline_processor = self._pipeline_factory.create_fim_pipeline()
+            pipeline_processor = self._pipeline_factory.create_fim_pipeline(client_type)
             logger.info("FIM pipeline selected for execution.")
             normalized_request = self._fim_normalizer.normalize(normalized_request)
         else:
-            pipeline_processor = self._pipeline_factory.create_input_pipeline()
+            pipeline_processor = self._pipeline_factory.create_input_pipeline(client_type)
             logger.info("Chat completion pipeline selected for execution.")
         if pipeline_processor is None:
             return PipelineResult(request=normalized_request)
@@ -253,7 +254,11 @@ def _dump_request_response(self, prefix: str, data: Any) -> None:
                 f.write(str(data))
 
     async def complete(
-        self, data: Dict, api_key: Optional[str], is_fim_request: bool
+        self,
+        data: Dict,
+        api_key: Optional[str],
+        is_fim_request: bool,
+        client_type: ClientType,
     ) -> Union[ModelResponse, AsyncIterator[ModelResponse]]:
         """
         Main completion flow with pipeline integration
@@ -272,12 +277,16 @@ async def complete(
         # Dump the normalized request
         self._dump_request_response("normalized-request", normalized_request)
         streaming = normalized_request.get("stream", False)
+
+        # Get detected client if available
         input_pipeline_result = await self._run_input_pipeline(
             normalized_request,
             api_key,
             data.get("base_url"),
+            client_type,
             is_fim_request,
         )
+
         if input_pipeline_result.response and input_pipeline_result.context:
             return await self._pipeline_response_formatter.handle_pipeline_response(
                 input_pipeline_result.response, streaming, context=input_pipeline_result.context
diff --git a/src/codegate/providers/copilot/pipeline.py b/src/codegate/providers/copilot/pipeline.py
@@ -8,6 +8,7 @@
 from litellm.types.llms.openai import ChatCompletionRequest
 from litellm.types.utils import Delta, StreamingChoices
 
+from codegate.clients.clients import ClientType
 from codegate.pipeline.base import PipelineContext, PipelineResult, SequentialPipelineProcessor
 from codegate.pipeline.factory import PipelineFactory
 from codegate.providers.normalizer.completion import CompletionNormalizer
@@ -200,7 +201,7 @@ def _create_normalizer(self):
         return CopilotFimNormalizer()
 
     def _create_pipeline(self) -> SequentialPipelineProcessor:
-        return self.pipeline_factory.create_fim_pipeline()
+        return self.pipeline_factory.create_fim_pipeline(ClientType.COPILOT)
 
 
 class CopilotChatPipeline(CopilotPipeline):
@@ -216,4 +217,4 @@ def _create_normalizer(self):
         return CopilotChatNormalizer()
 
     def _create_pipeline(self) -> SequentialPipelineProcessor:
-        return self.pipeline_factory.create_input_pipeline()
+        return self.pipeline_factory.create_input_pipeline(ClientType.COPILOT)
diff --git a/src/codegate/providers/llamacpp/provider.py b/src/codegate/providers/llamacpp/provider.py
@@ -44,7 +44,9 @@ async def process_request(
     ):
         is_fim_request = self._is_fim_request(request_url_path, data)
         try:
-            stream = await self.complete(data, None, is_fim_request=is_fim_request)
+            stream = await self.complete(
+                data, None, is_fim_request=is_fim_request, client_type=client_type
+            )
         except RuntimeError as e:
             # propagate as error 500
             logger.error("Error in LlamaCppProvider completion", error=str(e))
diff --git a/src/codegate/providers/ollama/provider.py b/src/codegate/providers/ollama/provider.py
@@ -66,7 +66,12 @@ async def process_request(
     ):
         is_fim_request = self._is_fim_request(request_url_path, data)
         try:
-            stream = await self.complete(data, api_key=None, is_fim_request=is_fim_request)
+            stream = await self.complete(
+                data,
+                api_key=None,
+                is_fim_request=is_fim_request,
+                client_type=client_type,
+            )
         except httpx.ConnectError as e:
             logger.error("Error in OllamaProvider completion", error=str(e))
             raise HTTPException(status_code=503, detail="Ollama service is unavailable")
diff --git a/src/codegate/providers/openai/provider.py b/src/codegate/providers/openai/provider.py
@@ -54,7 +54,12 @@ async def process_request(
         is_fim_request = self._is_fim_request(request_url_path, data)
 
         try:
-            stream = await self.complete(data, api_key, is_fim_request=is_fim_request)
+            stream = await self.complete(
+                data,
+                api_key,
+                is_fim_request=is_fim_request,
+                client_type=client_type,
+            )
         except Exception as e:
             #  check if we have an status code there
             if hasattr(e, "status_code"):
diff --git a/src/codegate/providers/vllm/provider.py b/src/codegate/providers/vllm/provider.py
@@ -77,7 +77,12 @@ async def process_request(
         is_fim_request = self._is_fim_request(request_url_path, data)
         try:
             # Pass the potentially None api_key to complete
-            stream = await self.complete(data, api_key, is_fim_request=is_fim_request)
+            stream = await self.complete(
+                data,
+                api_key,
+                is_fim_request=is_fim_request,
+                client_type=client_type,
+            )
         except Exception as e:
             # Check if we have a status code there
             if hasattr(e, "status_code"):
diff --git a/tests/pipeline/test_messages_block.py b/tests/pipeline/test_messages_block.py