1414
1515from ..types .content import ContentBlock , Messages
1616from ..types .models .openai import OpenAIModel
17+ from ..types .streaming import StreamEvent
18+ from ..types .tools import ToolSpec
1719
1820logger = logging .getLogger (__name__ )
1921
@@ -104,19 +106,29 @@ def format_request_message_content(cls, content: ContentBlock) -> dict[str, Any]
104106 return super ().format_request_message_content (content )
105107
106108 @override
107- async def stream (self , request : dict [str , Any ]) -> AsyncGenerator [dict [str , Any ], None ]:
108- """Send the request to the LiteLLM model and get the streaming response.
109+ async def stream (
110+ self , messages : Messages , tool_specs : Optional [list [ToolSpec ]] = None , system_prompt : Optional [str ] = None
111+ ) -> AsyncGenerator [StreamEvent , None ]:
112+ """Stream conversation with the LiteLLM model.
109113
110114 Args:
111- request: The formatted request to send to the LiteLLM model.
115+ messages: List of message objects to be processed by the model.
116+ tool_specs: List of tool specifications to make available to the model.
117+ system_prompt: System prompt to provide context to the model.
112118
113- Returns :
114- An iterable of response events from the LiteLLM model.
119+ Yields :
120+ Formatted message chunks from the model.
115121 """
122+ logger .debug ("formatting request" )
123+ request = self .format_request (messages , tool_specs , system_prompt )
124+ logger .debug ("formatted request=<%s>" , request )
125+
126+ logger .debug ("invoking model" )
116127 response = self .client .chat .completions .create (** request )
117128
118- yield {"chunk_type" : "message_start" }
119- yield {"chunk_type" : "content_start" , "data_type" : "text" }
129+ logger .debug ("got response from model" )
130+ yield self .format_chunk ({"chunk_type" : "message_start" })
131+ yield self .format_chunk ({"chunk_type" : "content_start" , "data_type" : "text" })
120132
121133 tool_calls : dict [int , list [Any ]] = {}
122134
@@ -127,38 +139,44 @@ async def stream(self, request: dict[str, Any]) -> AsyncGenerator[dict[str, Any]
127139 choice = event .choices [0 ]
128140
129141 if choice .delta .content :
130- yield {"chunk_type" : "content_delta" , "data_type" : "text" , "data" : choice .delta .content }
142+ yield self .format_chunk (
143+ {"chunk_type" : "content_delta" , "data_type" : "text" , "data" : choice .delta .content }
144+ )
131145
132146 if hasattr (choice .delta , "reasoning_content" ) and choice .delta .reasoning_content :
133- yield {
134- "chunk_type" : "content_delta" ,
135- "data_type" : "reasoning_content" ,
136- "data" : choice .delta .reasoning_content ,
137- }
147+ yield self .format_chunk (
148+ {
149+ "chunk_type" : "content_delta" ,
150+ "data_type" : "reasoning_content" ,
151+ "data" : choice .delta .reasoning_content ,
152+ }
153+ )
138154
139155 for tool_call in choice .delta .tool_calls or []:
140156 tool_calls .setdefault (tool_call .index , []).append (tool_call )
141157
142158 if choice .finish_reason :
143159 break
144160
145- yield {"chunk_type" : "content_stop" , "data_type" : "text" }
161+ yield self . format_chunk ( {"chunk_type" : "content_stop" , "data_type" : "text" })
146162
147163 for tool_deltas in tool_calls .values ():
148- yield {"chunk_type" : "content_start" , "data_type" : "tool" , "data" : tool_deltas [0 ]}
164+ yield self . format_chunk ( {"chunk_type" : "content_start" , "data_type" : "tool" , "data" : tool_deltas [0 ]})
149165
150166 for tool_delta in tool_deltas :
151- yield {"chunk_type" : "content_delta" , "data_type" : "tool" , "data" : tool_delta }
167+ yield self . format_chunk ( {"chunk_type" : "content_delta" , "data_type" : "tool" , "data" : tool_delta })
152168
153- yield {"chunk_type" : "content_stop" , "data_type" : "tool" }
169+ yield self . format_chunk ( {"chunk_type" : "content_stop" , "data_type" : "tool" })
154170
155- yield {"chunk_type" : "message_stop" , "data" : choice .finish_reason }
171+ yield self . format_chunk ( {"chunk_type" : "message_stop" , "data" : choice .finish_reason })
156172
157173 # Skip remaining events as we don't have use for anything except the final usage payload
158174 for event in response :
159175 _ = event
160176
161- yield {"chunk_type" : "metadata" , "data" : event .usage }
177+ yield self .format_chunk ({"chunk_type" : "metadata" , "data" : event .usage })
178+
179+ logger .debug ("finished streaming response from model" )
162180
163181 @override
164182 async def structured_output (
@@ -178,7 +196,7 @@ async def structured_output(
178196 # completions() has a method `create()` which wraps the real completion API of Litellm
179197 response = self .client .chat .completions .create (
180198 model = self .get_config ()["model_id" ],
181- messages = super () .format_request (prompt )["messages" ],
199+ messages = self .format_request (prompt )["messages" ],
182200 response_format = output_model ,
183201 )
184202
0 commit comments