Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Support llamacpp provider in muxing #889

Merged
merged 1 commit into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/codegate/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"vllm": "http://localhost:8000", # Base URL without /v1 path
"ollama": "http://localhost:11434", # Default Ollama server URL
"lm_studio": "http://localhost:1234",
"llamacpp": "./codegate_volume/models", # Default LlamaCpp model path
}


Expand Down
2 changes: 2 additions & 0 deletions src/codegate/muxing/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ def __init__(self):
db_models.ProviderType.ollama: self._format_ollama,
db_models.ProviderType.openai: self._format_openai,
db_models.ProviderType.anthropic: self._format_antropic,
# Our Lllamacpp provider emits OpenAI chunks
db_models.ProviderType.llamacpp: self._format_openai,
}

def _format_ollama(self, chunk: str) -> str:
Expand Down
1 change: 1 addition & 0 deletions src/codegate/providers/crud/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ def __provider_endpoint_from_cfg(


def provider_default_endpoints(provider_type: str) -> str:
# TODO: These providers default endpoints should come from config.py
defaults = {
"openai": "https://api.openai.com",
"anthropic": "https://api.anthropic.com",
Expand Down
5 changes: 4 additions & 1 deletion src/codegate/providers/llamacpp/completion_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,16 @@ async def execute_completion(
"""
Execute the completion request with inference engine API
"""
model_path = f"{Config.get_config().model_base_path}/{request['model']}.gguf"
model_path = f"{request['base_url']}/{request['model']}.gguf"

# Create a copy of the request dict and remove stream_options
# Reason - Request error as JSON:
# {'error': "Llama.create_completion() got an unexpected keyword argument 'stream_options'"}
request_dict = dict(request)
request_dict.pop("stream_options", None)
# Remove base_url from the request dict. We use this field as a standard across
# all providers to specify the base URL of the model.
request_dict.pop("base_url", None)

if is_fim_request:
response = await self.inference_engine.complete(
Expand Down
17 changes: 14 additions & 3 deletions src/codegate/providers/llamacpp/provider.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import json
from pathlib import Path
from typing import List

import structlog
from fastapi import HTTPException, Request

from codegate.config import Config
from codegate.pipeline.factory import PipelineFactory
from codegate.providers.base import BaseProvider
from codegate.providers.base import BaseProvider, ModelFetchError
from codegate.providers.llamacpp.completion_handler import LlamaCppCompletionHandler
from codegate.providers.llamacpp.normalizer import LLamaCppInputNormalizer, LLamaCppOutputNormalizer

Expand All @@ -30,8 +32,16 @@ def provider_route_name(self) -> str:
return "llamacpp"

def models(self, endpoint: str = None, api_key: str = None) -> List[str]:
# TODO: Implement file fetching
return []
models_path = Path(Config.get_config().model_base_path)
if not models_path.is_dir():
raise ModelFetchError(f"llamacpp model path does not exist: {models_path}")

# return all models except the all-minilm-L6-v2-q5_k_m model which we use for embeddings
return [
model.stem
for model in models_path.glob("*.gguf")
if model.is_file() and model.stem != "all-minilm-L6-v2-q5_k_m"
]

async def process_request(self, data: dict, api_key: str, request_url_path: str):
is_fim_request = self._is_fim_request(request_url_path, data)
Expand Down Expand Up @@ -66,5 +76,6 @@ async def create_completion(
):
body = await request.body()
data = json.loads(body)
data["base_url"] = Config.get_config().model_base_path

return await self.process_request(data, None, request.url.path)