Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion libs/oci/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ embeddings.embed_query("What is the meaning of life?")
```

### 4. Use Structured Output
`ChatOCIGenAI` supports structured output.
`ChatOCIGenAI` supports structured output.

<sub>**Note:** The default method is `function_calling`. If default method returns `None` (e.g. for Gemini models), try `json_schema` or `json_mode`.</sub>

Expand All @@ -79,6 +79,30 @@ structured_llm = llm.with_structured_output(Joke)
structured_llm.invoke("Tell me a joke about programming")
```

### 5. Use Parallel Tool Calling
Enable parallel tool calling to execute multiple tools simultaneously, improving performance for multi-tool workflows.

```python
from langchain_oci import ChatOCIGenAI

# Option 1: Set at class level for all tool bindings
llm = ChatOCIGenAI(
model_id="meta.llama-3.3-70b-instruct", # Works with Meta, Llama, Grok, OpenAI, Mistral
service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
compartment_id="MY_COMPARTMENT_ID",
parallel_tool_calls=True # Enable parallel tool calling
)

# Option 2: Set per-binding
llm = ChatOCIGenAI(model_id="xai.grok-4-fast") # Example with Grok
llm_with_tools = llm.bind_tools(
[get_weather, calculate_tip, get_population],
parallel_tool_calls=True # Tools can execute simultaneously
)
```

<sub>**Note:** Parallel tool calling is supported for all models using GenericChatRequest (Meta, Llama, xAI Grok, OpenAI, Mistral). Cohere models will raise an error if this parameter is used.</sub>


## OCI Data Science Model Deployment Examples

Expand Down
141 changes: 117 additions & 24 deletions libs/oci/langchain_oci/chat_models/oci_generative_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,13 @@ def chat_generation_info(self, response: Any) -> Dict[str, Any]:
}

# Include token usage if available
if hasattr(response.data.chat_response, "usage") and response.data.chat_response.usage:
generation_info["total_tokens"] = response.data.chat_response.usage.total_tokens
if (
hasattr(response.data.chat_response, "usage")
and response.data.chat_response.usage
):
generation_info["total_tokens"] = (
response.data.chat_response.usage.total_tokens
)

# Include tool calls if available
if self.chat_tool_calls(response):
Expand Down Expand Up @@ -342,6 +347,14 @@ def messages_to_oci_params(

This includes conversion of chat history and tool call results.
"""
# Cohere models don't support parallel tool calls
if kwargs.get("is_parallel_tool_calls"):
raise ValueError(
"Parallel tool calls are not supported for Cohere models. "
"This feature is only available for models using GenericChatRequest "
"(Meta, Llama, xAI Grok, OpenAI, Mistral)."
)

is_force_single_step = kwargs.get("is_force_single_step", False)
oci_chat_history = []

Expand Down Expand Up @@ -622,9 +635,14 @@ def chat_generation_info(self, response: Any) -> Dict[str, Any]:
}

# Include token usage if available
if hasattr(response.data.chat_response, "usage") and response.data.chat_response.usage:
generation_info["total_tokens"] = response.data.chat_response.usage.total_tokens

if (
hasattr(response.data.chat_response, "usage")
and response.data.chat_response.usage
):
generation_info["total_tokens"] = (
response.data.chat_response.usage.total_tokens
)

if self.chat_tool_calls(response):
generation_info["tool_calls"] = self.format_response_tool_calls(
self.chat_tool_calls(response)
Expand Down Expand Up @@ -770,8 +788,7 @@ def messages_to_oci_params(
# continue calling tools even after receiving results.

def _should_allow_more_tool_calls(
messages: List[BaseMessage],
max_tool_calls: int
messages: List[BaseMessage], max_tool_calls: int
) -> bool:
"""
Determine if the model should be allowed to call more tools.
Expand All @@ -787,10 +804,7 @@ def _should_allow_more_tool_calls(
max_tool_calls: Maximum number of tool calls before forcing stop
"""
# Count total tool calls made so far
tool_call_count = sum(
1 for msg in messages
if isinstance(msg, ToolMessage)
)
tool_call_count = sum(1 for msg in messages if isinstance(msg, ToolMessage))

# Safety limit: prevent runaway tool calling
if tool_call_count >= max_tool_calls:
Expand All @@ -799,12 +813,12 @@ def _should_allow_more_tool_calls(
# Detect infinite loop: same tool called with same arguments in succession
recent_calls = []
for msg in reversed(messages):
if hasattr(msg, 'tool_calls') and msg.tool_calls:
if hasattr(msg, "tool_calls") and msg.tool_calls:
for tc in msg.tool_calls:
# Create signature: (tool_name, sorted_args)
try:
args_str = json.dumps(tc.get('args', {}), sort_keys=True)
signature = (tc.get('name', ''), args_str)
args_str = json.dumps(tc.get("args", {}), sort_keys=True)
signature = (tc.get("name", ""), args_str)

# Check if this exact call was made in last 2 calls
if signature in recent_calls[-2:]:
Expand All @@ -829,6 +843,10 @@ def _should_allow_more_tool_calls(
result["tool_choice"] = self.oci_tool_choice_none()
# else: Allow model to decide (default behavior)

# Add parallel tool calls support (GenericChatRequest models)
if "is_parallel_tool_calls" in kwargs:
result["is_parallel_tool_calls"] = kwargs["is_parallel_tool_calls"]

return result

def _process_message_content(
Expand Down Expand Up @@ -1142,9 +1160,7 @@ def _prepare_request(
) from ex

oci_params = self._provider.messages_to_oci_params(
messages,
max_sequential_tool_calls=self.max_sequential_tool_calls,
**kwargs
messages, max_sequential_tool_calls=self.max_sequential_tool_calls, **kwargs
)

oci_params["is_stream"] = stream
Expand All @@ -1154,12 +1170,17 @@ def _prepare_request(
_model_kwargs[self._provider.stop_sequence_key] = stop

# Warn if using max_tokens with OpenAI models
if self.model_id and self.model_id.startswith("openai.") and "max_tokens" in _model_kwargs:
if (
self.model_id
and self.model_id.startswith("openai.")
and "max_tokens" in _model_kwargs
):
import warnings

warnings.warn(
f"OpenAI models require 'max_completion_tokens' instead of 'max_tokens'.",
UserWarning,
stacklevel=2
stacklevel=2,
)

chat_params = {**_model_kwargs, **kwargs, **oci_params}
Expand All @@ -1179,13 +1200,57 @@ def _prepare_request(

return request

def _supports_parallel_tool_calls(self, model_id: str) -> bool:
"""Check if the model supports parallel tool calling.

Parallel tool calling is supported for:
- Llama 4+ only (tested and verified)
- Other GenericChatRequest models (xAI Grok, OpenAI, Mistral)

Not supported for:
- All Llama 3.x versions (3.0, 3.1, 3.2, 3.3)
- Cohere models

Args:
model_id: The model identifier (e.g., "meta.llama-4-maverick-17b-128e-instruct-fp8")

Returns:
bool: True if model supports parallel tool calling, False otherwise
"""
import re

# Extract provider from model_id (e.g., "meta" from "meta.llama-4-maverick-17b-128e-instruct-fp8")
provider = model_id.split(".")[0].lower()

# Cohere models don't support parallel tool calling
if provider == "cohere":
return False

# For Meta/Llama models, check version
if provider == "meta" and "llama" in model_id.lower():
# Extract version number (e.g., "4" from "meta.llama-4-maverick-17b-128e-instruct-fp8")
version_match = re.search(r"llama-(\d+)", model_id.lower())
if version_match:
major = int(version_match.group(1))

# Only Llama 4+ supports parallel tool calling
# Llama 3.x (including 3.3) does NOT support it based on testing
if major >= 4:
return True

return False

# Other GenericChatRequest models (xAI Grok, OpenAI, Mistral) support it
return True

def bind_tools(
self,
tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
*,
tool_choice: Optional[
Union[dict, str, Literal["auto", "none", "required", "any"], bool]
] = None,
parallel_tool_calls: Optional[bool] = None,
**kwargs: Any,
) -> Runnable[LanguageModelInput, BaseMessage]:
"""Bind tool-like objects to this chat model.
Expand All @@ -1206,6 +1271,12 @@ def bind_tools(
{"type": "function", "function": {"name": <<tool_name>>}}:
calls <<tool_name>> tool.
- False or None: no effect, default Meta behavior.
parallel_tool_calls: Whether to enable parallel function calling.
If True, the model can call multiple tools simultaneously.
If False, tools are called sequentially.
If None (default), uses the class-level parallel_tool_calls setting.
Supported for models using GenericChatRequest (Meta, Llama, xAI Grok,
OpenAI, Mistral). Not supported for Cohere models.
kwargs: Any additional parameters are passed directly to
:meth:`~langchain_oci.chat_models.oci_generative_ai.ChatOCIGenAI.bind`.
"""
Expand All @@ -1215,6 +1286,28 @@ def bind_tools(
if tool_choice is not None:
kwargs["tool_choice"] = self._provider.process_tool_choice(tool_choice)

# Add parallel tool calls support
# Use bind-time parameter if provided, else fall back to class default
use_parallel = (
parallel_tool_calls
if parallel_tool_calls is not None
else self.parallel_tool_calls
)
if use_parallel:
# Validate model supports parallel tool calling
if not self._supports_parallel_tool_calls(self.model_id):
if "llama" in self.model_id.lower():
raise ValueError(
f"Parallel tool calls are not supported for {self.model_id}. "
"This feature is only available for Llama 4+ models. "
"Llama 3.x models (including 3.3) do not support parallel tool calling."
)
else:
raise ValueError(
f"Parallel tool calls are not supported for {self.model_id}."
)
kwargs["is_parallel_tool_calls"] = True

return super().bind(tools=formatted_tools, **kwargs)

def with_structured_output(
Expand Down Expand Up @@ -1244,7 +1337,7 @@ def with_structured_output(
used. Note that if using "json_mode" then you must include instructions
for formatting the output into the desired schema into the model call.
If "json_schema" then it allows the user to pass a json schema (or pydantic)
to the model for structured output.
to the model for structured output.
include_raw:
If False then only the parsed structured output is returned. If
an error occurs during model output parsing it will be raised. If True
Expand Down Expand Up @@ -1300,18 +1393,18 @@ def with_structured_output(
if is_pydantic_schema
else schema
)

response_json_schema = self._provider.oci_response_json_schema(
name=json_schema_dict.get("title", "response"),
description=json_schema_dict.get("description", ""),
schema=json_schema_dict,
is_strict=True
is_strict=True,
)

response_format_obj = self._provider.oci_json_schema_response_format(
json_schema=response_json_schema
)

llm = self.bind(response_format=response_format_obj)
if is_pydantic_schema:
output_parser = PydanticOutputParser(pydantic_object=schema)
Expand Down
7 changes: 7 additions & 0 deletions libs/oci/langchain_oci/llms/oci_generative_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,13 @@ class OCIGenAIBase(BaseModel, ABC):
"""Maximum tool calls before forcing final answer.
Prevents infinite loops while allowing multi-step orchestration."""

parallel_tool_calls: bool = False
"""Whether to enable parallel function calling during tool use.
If True, the model can call multiple tools simultaneously.
Supported for all models using GenericChatRequest (Meta, Llama, xAI Grok, OpenAI, Mistral).
Not supported for Cohere models.
Default: False for backward compatibility."""

model_config = ConfigDict(
extra="forbid", arbitrary_types_allowed=True, protected_namespaces=()
)
Expand Down
Loading