diff --git a/src/codegate/config.py b/src/codegate/config.py index 43bd5db1..fb4d08bf 100644 --- a/src/codegate/config.py +++ b/src/codegate/config.py @@ -21,6 +21,7 @@ "vllm": "http://localhost:8000", # Base URL without /v1 path "ollama": "http://localhost:11434", # Default Ollama server URL "lm_studio": "http://localhost:1234", + "llamacpp": "./codegate_volume/models", # Default LlamaCpp model path } diff --git a/src/codegate/muxing/adapter.py b/src/codegate/muxing/adapter.py index 9a5aa111..3a2d74ca 100644 --- a/src/codegate/muxing/adapter.py +++ b/src/codegate/muxing/adapter.py @@ -104,6 +104,8 @@ def __init__(self): db_models.ProviderType.ollama: self._format_ollama, db_models.ProviderType.openai: self._format_openai, db_models.ProviderType.anthropic: self._format_antropic, + # Our Lllamacpp provider emits OpenAI chunks + db_models.ProviderType.llamacpp: self._format_openai, } def _format_ollama(self, chunk: str) -> str: diff --git a/src/codegate/providers/crud/crud.py b/src/codegate/providers/crud/crud.py index d09f2cf5..cb879f00 100644 --- a/src/codegate/providers/crud/crud.py +++ b/src/codegate/providers/crud/crud.py @@ -365,6 +365,7 @@ def __provider_endpoint_from_cfg( def provider_default_endpoints(provider_type: str) -> str: + # TODO: These providers default endpoints should come from config.py defaults = { "openai": "https://api.openai.com", "anthropic": "https://api.anthropic.com", diff --git a/src/codegate/providers/llamacpp/completion_handler.py b/src/codegate/providers/llamacpp/completion_handler.py index 9460bf93..2d1147d6 100644 --- a/src/codegate/providers/llamacpp/completion_handler.py +++ b/src/codegate/providers/llamacpp/completion_handler.py @@ -57,13 +57,16 @@ async def execute_completion( """ Execute the completion request with inference engine API """ - model_path = f"{Config.get_config().model_base_path}/{request['model']}.gguf" + model_path = f"{request['base_url']}/{request['model']}.gguf" # Create a copy of the request dict and remove stream_options # Reason - Request error as JSON: # {'error': "Llama.create_completion() got an unexpected keyword argument 'stream_options'"} request_dict = dict(request) request_dict.pop("stream_options", None) + # Remove base_url from the request dict. We use this field as a standard across + # all providers to specify the base URL of the model. + request_dict.pop("base_url", None) if is_fim_request: response = await self.inference_engine.complete( diff --git a/src/codegate/providers/llamacpp/provider.py b/src/codegate/providers/llamacpp/provider.py index 69413fcd..0cfe6c3a 100644 --- a/src/codegate/providers/llamacpp/provider.py +++ b/src/codegate/providers/llamacpp/provider.py @@ -1,11 +1,13 @@ import json +from pathlib import Path from typing import List import structlog from fastapi import HTTPException, Request +from codegate.config import Config from codegate.pipeline.factory import PipelineFactory -from codegate.providers.base import BaseProvider +from codegate.providers.base import BaseProvider, ModelFetchError from codegate.providers.llamacpp.completion_handler import LlamaCppCompletionHandler from codegate.providers.llamacpp.normalizer import LLamaCppInputNormalizer, LLamaCppOutputNormalizer @@ -30,8 +32,16 @@ def provider_route_name(self) -> str: return "llamacpp" def models(self, endpoint: str = None, api_key: str = None) -> List[str]: - # TODO: Implement file fetching - return [] + models_path = Path(Config.get_config().model_base_path) + if not models_path.is_dir(): + raise ModelFetchError(f"llamacpp model path does not exist: {models_path}") + + # return all models except the all-minilm-L6-v2-q5_k_m model which we use for embeddings + return [ + model.stem + for model in models_path.glob("*.gguf") + if model.is_file() and model.stem != "all-minilm-L6-v2-q5_k_m" + ] async def process_request(self, data: dict, api_key: str, request_url_path: str): is_fim_request = self._is_fim_request(request_url_path, data) @@ -66,5 +76,6 @@ async def create_completion( ): body = await request.body() data = json.loads(body) + data["base_url"] = Config.get_config().model_base_path return await self.process_request(data, None, request.url.path)