|
| 1 | +# Copyright (c) Microsoft. All rights reserved. |
| 2 | + |
| 3 | +import json |
| 4 | +from collections.abc import ( |
| 5 | + AsyncIterable, |
| 6 | + Callable, |
| 7 | + Mapping, |
| 8 | + MutableMapping, |
| 9 | + MutableSequence, |
| 10 | + Sequence, |
| 11 | +) |
| 12 | +from itertools import chain |
| 13 | +from typing import Any, ClassVar |
| 14 | + |
| 15 | +from agent_framework import ( |
| 16 | + AIFunction, |
| 17 | + BaseChatClient, |
| 18 | + ChatMessage, |
| 19 | + ChatOptions, |
| 20 | + ChatResponse, |
| 21 | + ChatResponseUpdate, |
| 22 | + Contents, |
| 23 | + DataContent, |
| 24 | + FunctionCallContent, |
| 25 | + FunctionResultContent, |
| 26 | + Role, |
| 27 | + TextContent, |
| 28 | + TextReasoningContent, |
| 29 | + ToolProtocol, |
| 30 | + UsageDetails, |
| 31 | + get_logger, |
| 32 | + use_chat_middleware, |
| 33 | + use_function_invocation, |
| 34 | +) |
| 35 | +from agent_framework._pydantic import AFBaseSettings |
| 36 | +from agent_framework.exceptions import ( |
| 37 | + ServiceInitializationError, |
| 38 | + ServiceInvalidRequestError, |
| 39 | + ServiceResponseException, |
| 40 | +) |
| 41 | +from agent_framework.observability import use_instrumentation |
| 42 | +from ollama import AsyncClient |
| 43 | + |
| 44 | +# Rename imported types to avoid naming conflicts with Agent Framework types |
| 45 | +from ollama._types import ChatResponse as OllamaChatResponse |
| 46 | +from ollama._types import Message as OllamaMessage |
| 47 | +from pydantic import ValidationError |
| 48 | + |
| 49 | + |
| 50 | +class OllamaSettings(AFBaseSettings): |
| 51 | + """Ollama settings.""" |
| 52 | + |
| 53 | + env_prefix: ClassVar[str] = "OLLAMA_" |
| 54 | + |
| 55 | + host: str | None = None |
| 56 | + model_id: str | None = None |
| 57 | + |
| 58 | + |
| 59 | +logger = get_logger("agent_framework.ollama") |
| 60 | + |
| 61 | + |
| 62 | +@use_function_invocation |
| 63 | +@use_instrumentation |
| 64 | +@use_chat_middleware |
| 65 | +class OllamaChatClient(BaseChatClient): |
| 66 | + """Ollama Chat completion class.""" |
| 67 | + |
| 68 | + OTEL_PROVIDER_NAME: ClassVar[str] = "ollama" |
| 69 | + |
| 70 | + def __init__( |
| 71 | + self, |
| 72 | + *, |
| 73 | + host: str | None = None, |
| 74 | + client: AsyncClient | None = None, |
| 75 | + model_id: str | None = None, |
| 76 | + env_file_path: str | None = None, |
| 77 | + env_file_encoding: str | None = None, |
| 78 | + **kwargs: Any, |
| 79 | + ) -> None: |
| 80 | + """Initialize an Ollama Chat client. |
| 81 | +
|
| 82 | + Keyword Args: |
| 83 | + host: Ollama server URL, if none `http://localhost:11434` is used. |
| 84 | + Can be set via the OLLAMA_HOST env variable. |
| 85 | + client: An optional Ollama Client instance. If not provided, a new instance will be created. |
| 86 | + model_id: The Ollama chat model ID to use. Can be set via the OLLAMA_MODEL_ID env variable. |
| 87 | + env_file_path: An optional path to a dotenv (.env) file to load environment variables from. |
| 88 | + env_file_encoding: The encoding to use when reading the dotenv (.env) file. Defaults to 'utf-8'. |
| 89 | + **kwargs: Additional keyword arguments passed to BaseChatClient. |
| 90 | + """ |
| 91 | + try: |
| 92 | + ollama_settings = OllamaSettings( |
| 93 | + host=host, |
| 94 | + model_id=model_id, |
| 95 | + env_file_encoding=env_file_encoding, |
| 96 | + env_file_path=env_file_path, |
| 97 | + ) |
| 98 | + except ValidationError as ex: |
| 99 | + raise ServiceInitializationError("Failed to create Ollama settings.", ex) from ex |
| 100 | + |
| 101 | + if ollama_settings.model_id is None: |
| 102 | + raise ServiceInitializationError( |
| 103 | + "Ollama chat model ID must be provided via model_id or OLLAMA_MODEL_ID environment variable." |
| 104 | + ) |
| 105 | + |
| 106 | + self.model_id = ollama_settings.model_id |
| 107 | + self.client = client or AsyncClient(host=ollama_settings.host) |
| 108 | + # Save Host URL for serialization with to_dict() |
| 109 | + self.host = str(self.client._client.base_url) |
| 110 | + |
| 111 | + super().__init__(**kwargs) |
| 112 | + |
| 113 | + async def _inner_get_response( |
| 114 | + self, |
| 115 | + *, |
| 116 | + messages: MutableSequence[ChatMessage], |
| 117 | + chat_options: ChatOptions, |
| 118 | + **kwargs: Any, |
| 119 | + ) -> ChatResponse: |
| 120 | + options_dict = self._prepare_options(messages, chat_options) |
| 121 | + |
| 122 | + try: |
| 123 | + response: OllamaChatResponse = await self.client.chat( # type: ignore[misc] |
| 124 | + stream=False, |
| 125 | + **options_dict, |
| 126 | + **kwargs, |
| 127 | + ) |
| 128 | + except Exception as ex: |
| 129 | + raise ServiceResponseException(f"Ollama chat request failed : {ex}", ex) from ex |
| 130 | + |
| 131 | + return self._ollama_response_to_agent_framework_response(response) |
| 132 | + |
| 133 | + async def _inner_get_streaming_response( |
| 134 | + self, |
| 135 | + *, |
| 136 | + messages: MutableSequence[ChatMessage], |
| 137 | + chat_options: ChatOptions, |
| 138 | + **kwargs: Any, |
| 139 | + ) -> AsyncIterable[ChatResponseUpdate]: |
| 140 | + options_dict = self._prepare_options(messages, chat_options) |
| 141 | + |
| 142 | + try: |
| 143 | + response_object: AsyncIterable[OllamaChatResponse] = await self.client.chat( # type: ignore[misc] |
| 144 | + stream=True, |
| 145 | + **options_dict, |
| 146 | + **kwargs, |
| 147 | + ) |
| 148 | + except Exception as ex: |
| 149 | + raise ServiceResponseException(f"Ollama streaming chat request failed : {ex}", ex) from ex |
| 150 | + |
| 151 | + async for part in response_object: |
| 152 | + yield self._ollama_streaming_response_to_agent_framework_response(part) |
| 153 | + |
| 154 | + def _prepare_options(self, messages: MutableSequence[ChatMessage], chat_options: ChatOptions) -> dict[str, Any]: |
| 155 | + # Preprocess web search tool if it exists |
| 156 | + options_dict = chat_options.to_dict(exclude={"instructions", "type"}) |
| 157 | + |
| 158 | + # Promote additional_properties to the top level of options_dict |
| 159 | + additional_props = options_dict.pop("additional_properties", {}) |
| 160 | + options_dict.update(additional_props) |
| 161 | + |
| 162 | + # Prepare Messages from Agent Framework format to Ollama format |
| 163 | + if messages and "messages" not in options_dict: |
| 164 | + options_dict["messages"] = self._prepare_chat_history_for_request(messages) |
| 165 | + if "messages" not in options_dict: |
| 166 | + raise ServiceInvalidRequestError("Messages are required for chat completions") |
| 167 | + |
| 168 | + # Prepare Tools from Agent Framework format to Json Schema format |
| 169 | + if chat_options.tools: |
| 170 | + options_dict["tools"] = self._chat_to_tool_spec(chat_options.tools) |
| 171 | + |
| 172 | + # Currently Ollama only supports auto tool choice |
| 173 | + if chat_options.tool_choice == "required": |
| 174 | + raise ServiceInvalidRequestError("Ollama does not support required tool choice.") |
| 175 | + # Always auto: remove tool_choice since Ollama does not expose configuration to force or disable tools. |
| 176 | + if "tool_choice" in options_dict: |
| 177 | + del options_dict["tool_choice"] |
| 178 | + |
| 179 | + # Rename model_id to model for Ollama API, if no model is provided use the one from client initialization |
| 180 | + if "model_id" in options_dict: |
| 181 | + options_dict["model"] = options_dict.pop("model_id") |
| 182 | + |
| 183 | + if "model_id" not in options_dict: |
| 184 | + options_dict["model"] = self.model_id |
| 185 | + |
| 186 | + return options_dict |
| 187 | + |
| 188 | + def _prepare_chat_history_for_request(self, messages: MutableSequence[ChatMessage]) -> list[OllamaMessage]: |
| 189 | + ollama_messages = [self._agent_framework_message_to_ollama_message(msg) for msg in messages] |
| 190 | + # Flatten the list of lists into a single list |
| 191 | + return list(chain.from_iterable(ollama_messages)) |
| 192 | + |
| 193 | + def _agent_framework_message_to_ollama_message(self, message: ChatMessage) -> list[OllamaMessage]: |
| 194 | + message_converters: dict[str, Callable[[ChatMessage], list[OllamaMessage]]] = { |
| 195 | + Role.SYSTEM.value: self._format_system_message, |
| 196 | + Role.USER.value: self._format_user_message, |
| 197 | + Role.ASSISTANT.value: self._format_assistant_message, |
| 198 | + Role.TOOL.value: self._format_tool_message, |
| 199 | + } |
| 200 | + return message_converters[message.role.value](message) |
| 201 | + |
| 202 | + def _format_system_message(self, message: ChatMessage) -> list[OllamaMessage]: |
| 203 | + return [OllamaMessage(role="system", content=message.text)] |
| 204 | + |
| 205 | + def _format_user_message(self, message: ChatMessage) -> list[OllamaMessage]: |
| 206 | + if not any(isinstance(c, (DataContent, TextContent)) for c in message.contents) and not message.text: |
| 207 | + raise ServiceInvalidRequestError( |
| 208 | + "Ollama connector currently only supports user messages with TextContent or DataContent." |
| 209 | + ) |
| 210 | + |
| 211 | + if not any(isinstance(c, DataContent) for c in message.contents): |
| 212 | + return [OllamaMessage(role="user", content=message.text)] |
| 213 | + |
| 214 | + user_message = OllamaMessage(role="user", content=message.text) |
| 215 | + data_contents = [c for c in message.contents if isinstance(c, DataContent)] |
| 216 | + if data_contents: |
| 217 | + if not any(c.has_top_level_media_type("image") for c in data_contents): |
| 218 | + raise ServiceInvalidRequestError("Only image data content is supported for user messages in Ollama.") |
| 219 | + # Ollama expects base64 strings without prefix |
| 220 | + user_message["images"] = [c.uri.split(",")[1] for c in data_contents] |
| 221 | + return [user_message] |
| 222 | + |
| 223 | + def _format_assistant_message(self, message: ChatMessage) -> list[OllamaMessage]: |
| 224 | + text_content = message.text |
| 225 | + reasoning_contents = "".join(c.text for c in message.contents if isinstance(c, TextReasoningContent)) |
| 226 | + |
| 227 | + assistant_message = OllamaMessage(role="assistant", content=text_content, thinking=reasoning_contents) |
| 228 | + |
| 229 | + tool_calls = [item for item in message.contents if isinstance(item, FunctionCallContent)] |
| 230 | + if tool_calls: |
| 231 | + assistant_message["tool_calls"] = [ |
| 232 | + { |
| 233 | + "function": { |
| 234 | + "call_id": tool_call.call_id, |
| 235 | + "name": tool_call.name, |
| 236 | + "arguments": tool_call.arguments |
| 237 | + if isinstance(tool_call.arguments, Mapping) |
| 238 | + else json.loads(tool_call.arguments or "{}"), |
| 239 | + } |
| 240 | + } |
| 241 | + for tool_call in tool_calls |
| 242 | + ] |
| 243 | + return [assistant_message] |
| 244 | + |
| 245 | + def _format_tool_message(self, message: ChatMessage) -> list[OllamaMessage]: |
| 246 | + # Ollama does not support multiple tool results in a single message, so we create a separate |
| 247 | + return [ |
| 248 | + OllamaMessage(role="tool", content=str(item.result), tool_name=item.call_id) |
| 249 | + for item in message.contents |
| 250 | + if isinstance(item, FunctionResultContent) |
| 251 | + ] |
| 252 | + |
| 253 | + def _ollama_response_to_agent_framework_content(self, response: OllamaChatResponse) -> list[Contents]: |
| 254 | + contents: list[Contents] = [] |
| 255 | + if response.message.thinking: |
| 256 | + contents.append(TextReasoningContent(text=response.message.thinking)) |
| 257 | + if response.message.content: |
| 258 | + contents.append(TextContent(text=response.message.content)) |
| 259 | + if response.message.tool_calls: |
| 260 | + tool_calls = self._parse_ollama_tool_calls(response.message.tool_calls) |
| 261 | + contents.extend(tool_calls) |
| 262 | + return contents |
| 263 | + |
| 264 | + def _ollama_streaming_response_to_agent_framework_response( |
| 265 | + self, response: OllamaChatResponse |
| 266 | + ) -> ChatResponseUpdate: |
| 267 | + contents = self._ollama_response_to_agent_framework_content(response) |
| 268 | + return ChatResponseUpdate( |
| 269 | + contents=contents, |
| 270 | + role=Role.ASSISTANT, |
| 271 | + ai_model_id=response.model, |
| 272 | + created_at=response.created_at, |
| 273 | + ) |
| 274 | + |
| 275 | + def _ollama_response_to_agent_framework_response(self, response: OllamaChatResponse) -> ChatResponse: |
| 276 | + contents = self._ollama_response_to_agent_framework_content(response) |
| 277 | + |
| 278 | + return ChatResponse( |
| 279 | + messages=[ChatMessage(role=Role.ASSISTANT, contents=contents)], |
| 280 | + model_id=response.model, |
| 281 | + created_at=response.created_at, |
| 282 | + usage_details=UsageDetails( |
| 283 | + input_token_count=response.prompt_eval_count, |
| 284 | + output_token_count=response.eval_count, |
| 285 | + ), |
| 286 | + ) |
| 287 | + |
| 288 | + def _parse_ollama_tool_calls(self, tool_calls: Sequence[OllamaMessage.ToolCall]) -> list[Contents]: |
| 289 | + resp: list[Contents] = [] |
| 290 | + for tool in tool_calls: |
| 291 | + fcc = FunctionCallContent( |
| 292 | + call_id=tool.function.name, # Use name of function as call ID since Ollama doesn't provide a call ID |
| 293 | + name=tool.function.name, |
| 294 | + arguments=tool.function.arguments if isinstance(tool.function.arguments, dict) else "", |
| 295 | + raw_representation=tool.function, |
| 296 | + ) |
| 297 | + resp.append(fcc) |
| 298 | + return resp |
| 299 | + |
| 300 | + def _chat_to_tool_spec(self, tools: list[ToolProtocol | MutableMapping[str, Any]]) -> list[dict[str, Any]]: |
| 301 | + chat_tools: list[dict[str, Any]] = [] |
| 302 | + for tool in tools: |
| 303 | + if isinstance(tool, ToolProtocol): |
| 304 | + match tool: |
| 305 | + case AIFunction(): |
| 306 | + chat_tools.append(tool.to_json_schema_spec()) |
| 307 | + case _: |
| 308 | + raise ServiceInvalidRequestError( |
| 309 | + "Unsupported tool type '" |
| 310 | + f"{type(tool).__name__}" |
| 311 | + "' for Ollama client. Supported tool types: AIFunction." |
| 312 | + ) |
| 313 | + else: |
| 314 | + chat_tools.append(tool if isinstance(tool, dict) else dict(tool)) |
| 315 | + return chat_tools |
0 commit comments