Create the pipelines only once in the copilot provider

jhrozek · jhrozek · commit 380ec7430170 · 2025-01-12T23:17:36.000+01:00
Since the copilot provider class instance is created once per
connection, let's create the pipelines when establishing the connection
and reuse them.
diff --git a/src/codegate/providers/copilot/pipeline.py b/src/codegate/providers/copilot/pipeline.py
@@ -24,6 +24,7 @@ class CopilotPipeline(ABC):
 
     def __init__(self, pipeline_factory: PipelineFactory):
         self.pipeline_factory = pipeline_factory
+        self.instance = self._create_pipeline()
         self.normalizer = self._create_normalizer()
         self.provider_name = "openai"
 
@@ -33,7 +34,7 @@ def _create_normalizer(self):
         pass
 
     @abstractmethod
-    def create_pipeline(self) -> SequentialPipelineProcessor:
+    def _create_pipeline(self) -> SequentialPipelineProcessor:
         """Each strategy defines which pipeline to create"""
         pass
 
@@ -84,7 +85,11 @@ def _create_shortcut_response(result: PipelineResult, model: str) -> bytes:
         body = response.model_dump_json(exclude_none=True, exclude_unset=True).encode()
         return body
 
-    async def process_body(self, headers: list[str], body: bytes) -> Tuple[bytes, PipelineContext]:
+    async def process_body(
+        self,
+        headers: list[str],
+        body: bytes,
+    ) -> Tuple[bytes, PipelineContext | None]:
         """Common processing logic for all strategies"""
         try:
             normalized_body = self.normalizer.normalize(body)
@@ -97,8 +102,7 @@ async def process_body(self, headers: list[str], body: bytes) -> Tuple[bytes, Pi
                 except ValueError:
                     continue
 
-            pipeline = self.create_pipeline()
-            result = await pipeline.process_request(
+            result = await self.instance.process_request(
                 request=normalized_body,
                 provider=self.provider_name,
                 model=normalized_body.get("model", "gpt-4o-mini"),
@@ -168,10 +172,13 @@ class CopilotFimPipeline(CopilotPipeline):
     format and the FIM pipeline used by all providers.
     """
 
+    def __init__(self, pipeline_factory: PipelineFactory):
+        super().__init__(pipeline_factory)
+
     def _create_normalizer(self):
         return CopilotFimNormalizer()
 
-    def create_pipeline(self) -> SequentialPipelineProcessor:
+    def _create_pipeline(self) -> SequentialPipelineProcessor:
         return self.pipeline_factory.create_fim_pipeline()
 
 
@@ -181,8 +188,11 @@ class CopilotChatPipeline(CopilotPipeline):
     format and the FIM pipeline used by all providers.
     """
 
+    def __init__(self, pipeline_factory: PipelineFactory):
+        super().__init__(pipeline_factory)
+
     def _create_normalizer(self):
         return CopilotChatNormalizer()
 
-    def create_pipeline(self) -> SequentialPipelineProcessor:
+    def _create_pipeline(self) -> SequentialPipelineProcessor:
         return self.pipeline_factory.create_input_pipeline()
diff --git a/src/codegate/providers/copilot/provider.py b/src/codegate/providers/copilot/provider.py
@@ -150,8 +150,16 @@ def __init__(self, loop: asyncio.AbstractEventLoop):
         self.cert_manager = TLSCertDomainManager(self.ca)
         self._closing = False
         self.pipeline_factory = PipelineFactory(SecretsManager())
+        self.input_pipeline: Optional[CopilotPipeline] = None
+        self.fim_pipeline: Optional[CopilotPipeline] = None
+        # the context as provided by the pipeline
         self.context_tracking: Optional[PipelineContext] = None
 
+    def _ensure_pipelines(self):
+        if not self.input_pipeline or not self.fim_pipeline:
+            self.input_pipeline = CopilotChatPipeline(self.pipeline_factory)
+            self.fim_pipeline = CopilotFimPipeline(self.pipeline_factory)
+
     def _select_pipeline(self, method: str, path: str) -> Optional[CopilotPipeline]:
         if method != "POST":
             logger.debug("Not a POST request, no pipeline selected")
@@ -161,10 +169,10 @@ def _select_pipeline(self, method: str, path: str) -> Optional[CopilotPipeline]:
             if path == route.path:
                 if route.pipeline_type == PipelineType.FIM:
                     logger.debug("Selected FIM pipeline")
-                    return CopilotFimPipeline(self.pipeline_factory)
+                    return self.fim_pipeline
                 elif route.pipeline_type == PipelineType.CHAT:
                     logger.debug("Selected CHAT pipeline")
-                    return CopilotChatPipeline(self.pipeline_factory)
+                    return self.input_pipeline
 
         logger.debug("No pipeline selected")
         return None
@@ -181,7 +189,6 @@ async def _body_through_pipeline(
             # if we didn't select any strategy that would change the request
             # let's just pass through the body as-is
             return body, None
-        logger.debug(f"Processing body through pipeline: {len(body)} bytes")
         return await strategy.process_body(headers, body)
 
     async def _request_to_target(self, headers: list[str], body: bytes):
@@ -288,6 +295,9 @@ async def _forward_data_through_pipeline(self, data: bytes) -> Union[HttpRequest
             http_request.headers,
             http_request.body,
         )
+        # TODO: it's weird that we're overwriting the context.
+        # Should we set the context once? Maybe when
+        # creating the pipeline instance?
         self.context_tracking = context
 
         if context and context.shortcut_response:
@@ -431,7 +441,6 @@ def data_received(self, data: bytes) -> None:
         Handle received data from client. Since we need to process the complete body
         through our pipeline before forwarding, we accumulate the entire request first.
         """
-        logger.info(f"Received data from {self.peername}: {data}")
         try:
             if not self._check_buffer_size(data):
                 self.send_error_response(413, b"Request body too large")
@@ -442,6 +451,7 @@ def data_received(self, data: bytes) -> None:
             if not self.headers_parsed:
                 self.headers_parsed = self.parse_headers()
                 if self.headers_parsed:
+                    self._ensure_pipelines()
                     if self.request.method == "CONNECT":
                         self.handle_connect()
                         self.buffer.clear()
@@ -452,7 +462,6 @@ def data_received(self, data: bytes) -> None:
                 if self._has_complete_body():
                     # Process the complete request through the pipeline
                     complete_request = bytes(self.buffer)
-                    logger.debug(f"Complete request: {complete_request}")
                     self.buffer.clear()
                     asyncio.create_task(self._forward_data_to_target(complete_request))
 
@@ -756,10 +765,12 @@ def connection_made(self, transport: asyncio.Transport) -> None:
 
     def _ensure_output_processor(self) -> None:
         if self.proxy.context_tracking is None:
+            logger.debug("No context tracking, no need to process pipeline")
             # No context tracking, no need to process pipeline
             return
 
         if self.sse_processor is not None:
+            logger.debug("Already initialized, no need to reinitialize")
             # Already initialized, no need to reinitialize
             return