Remove the endpoint suffix fixes

rdimitrov · rdimitrov · commit 972b498260a2 · 2025-01-29T12:26:51.000+02:00
Signed-off-by: Radoslav Dimitrov &lt;radoslav@stacklok.com&gt;
diff --git a/src/codegate/providers/vllm/adapter.py b/src/codegate/providers/vllm/adapter.py
@@ -105,28 +105,26 @@ def _has_chat_ml_format(data: Dict) -> bool:
         return False
 
     def normalize(self, data: Dict) -> ChatCompletionRequest:
+        """
+        Normalize the input data to the format expected by LiteLLM.
+        Ensures the model name has the hosted_vllm prefix and base_url has /v1.
+        """
+        # Make a copy of the data to avoid modifying the original and normalize the message content
         normalized_data = self._normalize_content_messages(data)
 
-        # Format the model name
+        # Format the model name to include the provider
         if "model" in normalized_data:
             model_name = normalized_data["model"]
             if not model_name.startswith("hosted_vllm/"):
                 normalized_data["model"] = f"hosted_vllm/{model_name}"
 
-        # Construct the complete endpoint URL without duplicating paths
-        if "base_url" in normalized_data:
-            base_url = normalized_data["base_url"].rstrip("/")
-            normalized_data["base_url"] = base_url
-
         ret_data = normalized_data
         if self._has_chat_ml_format(normalized_data):
             ret_data = self._chat_ml_normalizer.normalize(normalized_data)
         else:
             ret_data = ChatCompletionRequest(**normalized_data)
-
         if ret_data.get("stream", False):
             ret_data["stream_options"] = {"include_usage": True}
-
         return ret_data
 
     def denormalize(self, data: ChatCompletionRequest) -> Dict:
diff --git a/src/codegate/providers/vllm/provider.py b/src/codegate/providers/vllm/provider.py
@@ -45,20 +45,6 @@ def _get_base_url(self) -> str:
                 base_url = f"{base_url}/v1"
         return base_url
 
-    def _get_endpoint_from_request(self, request: Request) -> str:
-        """
-        Extract the endpoint path from the request
-        """
-        path_parts = request.url.path.split("/")
-        # Find the index of 'vllm' in the path
-        try:
-            vllm_index = path_parts.index(self.provider_route_name)
-            # Get everything after 'vllm' but before any duplicates
-            endpoint = path_parts[vllm_index + 1].split("/")[0]  # Take just the first part
-            return endpoint
-        except ValueError:
-            return ""
-
     def models(self):
         resp = httpx.get(f"{self.base_url}/v1/models")
         jsonresp = resp.json()
@@ -117,11 +103,9 @@ async def create_completion(
             body = await request.body()
             data = json.loads(body)
 
-            # Add the vLLM base URL and original endpoint to the request
+            # Add the vLLM base URL to the request
             base_url = self._get_base_url()
-            endpoint = self._get_endpoint_from_request(request)
             data["base_url"] = base_url
-            data["original_endpoint"] = endpoint
 
             is_fim_request = self._is_fim_request(request, data)
             try: