Fix duplication

rdimitrov · rdimitrov · commit ff72833f55ab · 2025-01-29T11:33:36.000+02:00
Signed-off-by: Radoslav Dimitrov &lt;radoslav@stacklok.com&gt;
diff --git a/src/codegate/providers/vllm/adapter.py b/src/codegate/providers/vllm/adapter.py
@@ -105,41 +105,28 @@ def _has_chat_ml_format(data: Dict) -> bool:
         return False
 
     def normalize(self, data: Dict) -> ChatCompletionRequest:
-        """
-        Normalize the input data to the format expected by LiteLLM.
-        Ensures the model name has the hosted_vllm prefix and constructs the complete endpoint URL.
-        """
-        # Make a copy of the data to avoid modifying the original and normalize the message content
         normalized_data = self._normalize_content_messages(data)
 
-        # Format the model name to include the provider
+        # Format the model name
         if "model" in normalized_data:
             model_name = normalized_data["model"]
             if not model_name.startswith("hosted_vllm/"):
                 normalized_data["model"] = f"hosted_vllm/{model_name}"
 
-        # Construct the complete endpoint URL
+        # Construct the complete endpoint URL without duplicating paths
         if "base_url" in normalized_data:
             base_url = normalized_data["base_url"].rstrip("/")
-            original_endpoint = normalized_data.pop("original_endpoint", "")
-
-            # Ensure we have /v1 in the path
-            if not base_url.endswith("/v1"):
-                base_url = f"{base_url}/v1"
-
-            # Add the original endpoint if it exists
-            if original_endpoint:
-                normalized_data["base_url"] = f"{base_url}/{original_endpoint}"
-            else:
-                normalized_data["base_url"] = base_url
+            normalized_data["base_url"] = base_url
 
         ret_data = normalized_data
         if self._has_chat_ml_format(normalized_data):
             ret_data = self._chat_ml_normalizer.normalize(normalized_data)
         else:
             ret_data = ChatCompletionRequest(**normalized_data)
+
         if ret_data.get("stream", False):
             ret_data["stream_options"] = {"include_usage": True}
+
         return ret_data
 
     def denormalize(self, data: ChatCompletionRequest) -> Dict:
diff --git a/src/codegate/providers/vllm/provider.py b/src/codegate/providers/vllm/provider.py
@@ -38,7 +38,12 @@ def _get_base_url(self) -> str:
         """
         config = Config.get_config()
         base_url = config.provider_urls.get("vllm") if config else ""
-        return base_url.rstrip("/") if base_url else ""
+        if base_url:
+            base_url = base_url.rstrip("/")
+            # Add /v1 if not present
+            if not base_url.endswith("/v1"):
+                base_url = f"{base_url}/v1"
+        return base_url
 
     def _get_endpoint_from_request(self, request: Request) -> str:
         """
@@ -48,8 +53,8 @@ def _get_endpoint_from_request(self, request: Request) -> str:
         # Find the index of 'vllm' in the path
         try:
             vllm_index = path_parts.index(self.provider_route_name)
-            # Get everything after 'vllm'
-            endpoint = "/".join(path_parts[vllm_index + 1 :])
+            # Get everything after 'vllm' but before any duplicates
+            endpoint = path_parts[vllm_index + 1].split("/")[0]  # Take just the first part
             return endpoint
         except ValueError:
             return ""