Merge branch 'main' into vllm-integration-tests

rdimitrov · web-flow · commit 39b8a6b4a826 · 2025-01-29T12:30:33.000+02:00
diff --git a/.github/workflows/update-model-costs.yml b/.github/workflows/update-model-costs.yml
@@ -3,6 +3,7 @@ name: Update model prices and context window JSON file
 
 on:
   workflow_call:
+  workflow_dispatch:
   schedule:
     - cron: '0 2 * * 0'   # Run every Sunday at 2:00 AM
 
diff --git a/api/openapi.json b/api/openapi.json
@@ -1009,7 +1009,7 @@
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/TokenUsage"
+                  "$ref": "#/components/schemas/TokenUsageAggregate"
                 }
               }
             }
@@ -1233,10 +1233,10 @@
             "format": "date-time",
             "title": "Conversation Timestamp"
           },
-          "token_usage": {
+          "token_usage_agg": {
             "anyOf": [
               {
-                "$ref": "#/components/schemas/TokenUsage"
+                "$ref": "#/components/schemas/TokenUsageAggregate"
               },
               {
                 "type": "null"
@@ -1251,7 +1251,7 @@
           "type",
           "chat_id",
           "conversation_timestamp",
-          "token_usage"
+          "token_usage_agg"
         ],
         "title": "Conversation",
         "description": "Represents a conversation."
@@ -1467,7 +1467,8 @@
           "anthropic",
           "vllm",
           "ollama",
-          "lm_studio"
+          "lm_studio",
+          "llamacpp"
         ],
         "title": "ProviderType",
         "description": "Represents the different types of providers we support."
@@ -1505,25 +1506,51 @@
         "title": "QuestionType"
       },
       "TokenUsage": {
+        "properties": {
+          "input_tokens": {
+            "type": "integer",
+            "title": "Input Tokens",
+            "default": 0
+          },
+          "output_tokens": {
+            "type": "integer",
+            "title": "Output Tokens",
+            "default": 0
+          },
+          "input_cost": {
+            "type": "number",
+            "title": "Input Cost",
+            "default": 0
+          },
+          "output_cost": {
+            "type": "number",
+            "title": "Output Cost",
+            "default": 0
+          }
+        },
+        "type": "object",
+        "title": "TokenUsage",
+        "description": "TokenUsage it's not a table, it's a model to represent the token usage.\nThe data is stored in the outputs table."
+      },
+      "TokenUsageAggregate": {
         "properties": {
           "tokens_by_model": {
-            "items": {
+            "additionalProperties": {
               "$ref": "#/components/schemas/TokenUsageByModel"
             },
-            "type": "array",
+            "type": "object",
             "title": "Tokens By Model"
           },
-          "used_tokens": {
-            "type": "integer",
-            "title": "Used Tokens"
+          "token_usage": {
+            "$ref": "#/components/schemas/TokenUsage"
           }
         },
         "type": "object",
         "required": [
           "tokens_by_model",
-          "used_tokens"
+          "token_usage"
         ],
-        "title": "TokenUsage",
+        "title": "TokenUsageAggregate",
         "description": "Represents the tokens used. Includes the information of the tokens used by model.\n`used_tokens` are the total tokens used in the `tokens_by_model` list."
       },
       "TokenUsageByModel": {
@@ -1535,16 +1562,15 @@
             "type": "string",
             "title": "Model"
           },
-          "used_tokens": {
-            "type": "integer",
-            "title": "Used Tokens"
+          "token_usage": {
+            "$ref": "#/components/schemas/TokenUsage"
           }
         },
         "type": "object",
         "required": [
           "provider_type",
           "model",
-          "used_tokens"
+          "token_usage"
         ],
         "title": "TokenUsageByModel",
         "description": "Represents the tokens used by a model."
diff --git a/model_cost_data/model_prices_and_context_window.json b/model_cost_data/model_prices_and_context_window.json
@@ -4608,6 +4608,20 @@
         "litellm_provider": "replicate",
         "mode": "chat"
     },
+    "openrouter/deepseek/deepseek-r1": {
+        "max_tokens": 8192,
+        "max_input_tokens": 64000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.00000055,
+        "input_cost_per_token_cache_hit": 0.00000014,
+        "output_cost_per_token": 0.00000219,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true, 
+        "supports_assistant_prefill": true,
+        "supports_tool_choice": true,
+        "supports_prompt_caching": true
+    },
     "openrouter/deepseek/deepseek-chat": {
         "max_tokens": 8192,
         "max_input_tokens": 66000,
@@ -5210,6 +5224,24 @@
         "mode": "chat",
         "supports_system_messages": true
     },
+    "ai21.jamba-1-5-large-v1:0": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000008,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "ai21.jamba-1-5-mini-v1:0": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 0.0000002,
+        "output_cost_per_token": 0.0000004,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
     "amazon.titan-text-lite-v1": {
         "max_tokens": 4000, 
         "max_input_tokens": 42000,
@@ -5528,8 +5560,8 @@
         "max_tokens": 8192,
         "max_input_tokens": 200000,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000005,
+        "input_cost_per_token": 0.0000008,
+        "output_cost_per_token": 0.000004,
         "litellm_provider": "bedrock",
         "mode": "chat",
         "supports_assistant_prefill": true,
@@ -5598,8 +5630,8 @@
         "max_tokens": 8192,
         "max_input_tokens": 200000,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000005,
+        "input_cost_per_token": 0.0000008,
+        "output_cost_per_token": 0.000004,
         "litellm_provider": "bedrock",
         "mode": "chat",
         "supports_assistant_prefill": true,
@@ -5668,8 +5700,8 @@
         "max_tokens": 8192,
         "max_input_tokens": 200000,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000005,
+        "input_cost_per_token": 0.00000025,
+        "output_cost_per_token": 0.00000125,
         "litellm_provider": "bedrock",
         "mode": "chat",
         "supports_function_calling": true,
@@ -6043,8 +6075,8 @@
         "max_tokens": 8191, 
         "max_input_tokens": 100000, 
         "max_output_tokens": 8191,
-        "input_cost_per_token": 0.00000163,
-        "output_cost_per_token": 0.00000551,
+        "input_cost_per_token": 0.0000008,
+        "output_cost_per_token": 0.0000024,
         "litellm_provider": "bedrock",
         "mode": "chat"
     },
diff --git a/src/codegate/api/v1.py b/src/codegate/api/v1.py
@@ -128,6 +128,27 @@ async def add_provider_endpoint(
     return provend
 
 
+@v1.put(
+    "/provider-endpoints/{provider_id}/auth-material",
+    tags=["Providers"],
+    generate_unique_id_function=uniq_name,
+    status_code=204,
+)
+async def configure_auth_material(
+    provider_id: UUID,
+    request: v1_models.ConfigureAuthMaterial,
+):
+    """Configure auth material for a provider."""
+    try:
+        await pcrud.configure_auth_material(provider_id, request)
+    except provendcrud.ProviderNotFoundError:
+        raise HTTPException(status_code=404, detail="Provider endpoint not found")
+    except Exception:
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+    return Response(status_code=204)
+
+
 @v1.put(
     "/provider-endpoints/{provider_id}", tags=["Providers"], generate_unique_id_function=uniq_name
 )
diff --git a/src/codegate/api/v1_models.py b/src/codegate/api/v1_models.py
@@ -223,7 +223,7 @@ class ProviderEndpoint(pydantic.BaseModel):
     description: str = ""
     provider_type: ProviderType
     endpoint: str
-    auth_type: ProviderAuthType
+    auth_type: Optional[ProviderAuthType] = ProviderAuthType.none
 
     @staticmethod
     def from_db_model(db_model: db_models.ProviderEndpoint) -> "ProviderEndpoint":
@@ -250,6 +250,15 @@ def get_from_registry(self, registry: ProviderRegistry) -> Optional[BaseProvider
         return registry.get_provider(self.provider_type)
 
 
+class ConfigureAuthMaterial(pydantic.BaseModel):
+    """
+    Represents a request to configure auth material for a provider.
+    """
+
+    auth_type: ProviderAuthType
+    api_key: Optional[str] = None
+
+
 class ModelByProvider(pydantic.BaseModel):
     """
     Represents a model supported by a provider.
diff --git a/src/codegate/db/connection.py b/src/codegate/db/connection.py
@@ -441,8 +441,10 @@ async def push_provider_auth_material(self, auth_material: ProviderAuthMaterial)
             UPDATE provider_endpoints
             SET auth_type = :auth_type, auth_blob = :auth_blob
             WHERE id = :provider_endpoint_id
+            RETURNING id as provider_endpoint_id, auth_type, auth_blob
             """
         )
+        # Here we DONT want to return the result
         _ = await self._execute_update_pydantic_model(auth_material, sql, should_raise=True)
         return
 
diff --git a/src/codegate/providers/crud/crud.py b/src/codegate/providers/crud/crud.py
@@ -81,6 +81,27 @@ async def update_endpoint(
         dbendpoint = await self._db_writer.update_provider_endpoint(endpoint.to_db_model())
         return apimodelsv1.ProviderEndpoint.from_db_model(dbendpoint)
 
+    async def configure_auth_material(
+        self, provider_id: UUID, config: apimodelsv1.ConfigureAuthMaterial
+    ):
+        """Add an API key."""
+        if config.auth_type == apimodelsv1.ProviderAuthType.api_key and not config.api_key:
+            raise ValueError("API key must be provided for API auth type")
+        elif config.auth_type != apimodelsv1.ProviderAuthType.api_key and config.api_key:
+            raise ValueError("API key provided for non-API auth type")
+
+        dbendpoint = await self._db_reader.get_provider_endpoint_by_id(str(provider_id))
+        if dbendpoint is None:
+            raise ProviderNotFoundError("Provider not found")
+
+        await self._db_writer.push_provider_auth_material(
+            dbmodels.ProviderAuthMaterial(
+                provider_endpoint_id=dbendpoint.id,
+                auth_type=config.auth_type,
+                auth_blob=config.api_key if config.api_key else "",
+            )
+        )
+
     async def delete_endpoint(self, provider_id: UUID):
         """Delete an endpoint."""