77from typing import Any , Dict , Iterable , List , Optional , Set , Tuple
88
99import opentelemetry .metrics as metrics_api
10- from opentelemetry .metrics import Counter , Meter
10+ from opentelemetry .metrics import Counter , Histogram , Meter
1111
1212from ..telemetry import metrics_constants as constants
1313from ..types .content import Message
@@ -121,22 +121,34 @@ class ToolMetrics:
121121 error_count : int = 0
122122 total_time : float = 0.0
123123
124- def add_call (self , tool : ToolUse , duration : float , success : bool ) -> None :
124+ def add_call (
125+ self ,
126+ tool : ToolUse ,
127+ duration : float ,
128+ success : bool ,
129+ metrics_client : "MetricsClient" ,
130+ attributes : Optional [Dict [str , Any ]] = None ,
131+ ) -> None :
125132 """Record a new tool call with its outcome.
126133
127134 Args:
128135 tool: The tool that was called.
129136 duration: How long the call took in seconds.
130137 success: Whether the call was successful.
138+ metrics_client: The metrics client for recording the metrics.
139+ attributes: attributes of the metrics.
131140 """
132141 self .tool = tool # Update with latest tool state
133142 self .call_count += 1
134143 self .total_time += duration
135-
144+ metrics_client .tool_call_count .add (1 , attributes = attributes )
145+ metrics_client .tool_duration .record (duration , attributes = attributes )
136146 if success :
137147 self .success_count += 1
148+ metrics_client .tool_success_count .add (1 , attributes = attributes )
138149 else :
139150 self .error_count += 1
151+ metrics_client .tool_error_count .add (1 , attributes = attributes )
140152
141153
142154@dataclass
@@ -159,12 +171,22 @@ class EventLoopMetrics:
159171 accumulated_usage : Usage = field (default_factory = lambda : Usage (inputTokens = 0 , outputTokens = 0 , totalTokens = 0 ))
160172 accumulated_metrics : Metrics = field (default_factory = lambda : Metrics (latencyMs = 0 ))
161173
162- def start_cycle (self ) -> Tuple [float , Trace ]:
174+ @property
175+ def _metrics_client (self ) -> "MetricsClient" :
176+ """Get the singleton MetricsClient instance."""
177+ return MetricsClient ()
178+
179+ def start_cycle (
180+ self ,
181+ attributes : Optional [Dict [str , Any ]] = None ,
182+ ) -> Tuple [float , Trace ]:
163183 """Start a new event loop cycle and create a trace for it.
164184
165185 Returns:
166186 A tuple containing the start time and the cycle trace object.
167187 """
188+ self ._metrics_client .event_loop_cycle_count .add (1 , attributes = attributes )
189+ self ._metrics_client .event_loop_start_cycle .add (1 , attributes = attributes )
168190 self .cycle_count += 1
169191 start_time = time .time ()
170192 cycle_trace = Trace (f"Cycle { self .cycle_count } " , start_time = start_time )
@@ -177,14 +199,22 @@ def end_cycle(self, start_time: float, cycle_trace: Trace) -> None:
177199 Args:
178200 start_time: The timestamp when the cycle started.
179201 cycle_trace: The trace object for this cycle.
202+ metrics_client: The metrics client for recording the metrics.
180203 """
204+ self ._metrics_client .event_loop_end_cycle .add (1 )
181205 end_time = time .time ()
182206 duration = end_time - start_time
207+ self ._metrics_client .event_loop_cycle_duration .record (duration )
183208 self .cycle_durations .append (duration )
184209 cycle_trace .end (end_time )
185210
186211 def add_tool_usage (
187- self , tool : ToolUse , duration : float , tool_trace : Trace , success : bool , message : Message
212+ self ,
213+ tool : ToolUse ,
214+ duration : float ,
215+ tool_trace : Trace ,
216+ success : bool ,
217+ message : Message ,
188218 ) -> None :
189219 """Record metrics for a tool invocation.
190220
@@ -207,16 +237,27 @@ def add_tool_usage(
207237 tool_trace .raw_name = f"{ tool_name } - { tool_use_id } "
208238 tool_trace .add_message (message )
209239
210- self .tool_metrics .setdefault (tool_name , ToolMetrics (tool )).add_call (tool , duration , success )
211-
240+ self .tool_metrics .setdefault (tool_name , ToolMetrics (tool )).add_call (
241+ tool ,
242+ duration ,
243+ success ,
244+ self ._metrics_client ,
245+ attributes = {
246+ "tool_name" : tool_name ,
247+ "tool_use_id" : tool_use_id ,
248+ },
249+ )
212250 tool_trace .end ()
213251
214252 def update_usage (self , usage : Usage ) -> None :
215253 """Update the accumulated token usage with new usage data.
216254
217255 Args:
218256 usage: The usage data to add to the accumulated totals.
257+ metrics_client: The metrics client for recording the metrics.
219258 """
259+ self ._metrics_client .event_loop_input_tokens .record (usage ["inputTokens" ])
260+ self ._metrics_client .event_loop_output_tokens .record (usage ["outputTokens" ])
220261 self .accumulated_usage ["inputTokens" ] += usage ["inputTokens" ]
221262 self .accumulated_usage ["outputTokens" ] += usage ["outputTokens" ]
222263 self .accumulated_usage ["totalTokens" ] += usage ["totalTokens" ]
@@ -226,7 +267,9 @@ def update_metrics(self, metrics: Metrics) -> None:
226267
227268 Args:
228269 metrics: The metrics data to add to the accumulated totals.
270+ metrics_client: The metrics client for recording the metrics.
229271 """
272+ self ._metrics_client .event_loop_latency .record (metrics ["latencyMs" ])
230273 self .accumulated_metrics ["latencyMs" ] += metrics ["latencyMs" ]
231274
232275 def get_summary (self ) -> Dict [str , Any ]:
@@ -370,7 +413,18 @@ class MetricsClient:
370413
371414 _instance : Optional ["MetricsClient" ] = None
372415 meter : Meter
373- strands_agent_invocation_count : Counter
416+ event_loop_cycle_count : Counter
417+ event_loop_start_cycle : Counter
418+ event_loop_end_cycle : Counter
419+ event_loop_cycle_duration : Histogram
420+ event_loop_latency : Histogram
421+ event_loop_input_tokens : Histogram
422+ event_loop_output_tokens : Histogram
423+
424+ tool_call_count : Counter
425+ tool_success_count : Counter
426+ tool_error_count : Counter
427+ tool_duration : Histogram
374428
375429 def __new__ (cls ) -> "MetricsClient" :
376430 """Create or return the singleton instance of MetricsClient.
@@ -398,6 +452,24 @@ def __init__(self) -> None:
398452
399453 def create_instruments (self ) -> None :
400454 """Create and initialize all OpenTelemetry metric instruments."""
401- self .strands_agent_invocation_count = self .meter .create_counter (
402- name = constants .STRANDS_AGENT_INVOCATION_COUNT , unit = "Count"
455+ self .event_loop_cycle_count = self .meter .create_counter (
456+ name = constants .STRANDS_EVENT_LOOP_CYCLE_COUNT , unit = "Count"
457+ )
458+ self .event_loop_start_cycle = self .meter .create_counter (
459+ name = constants .STRANDS_EVENT_LOOP_START_CYCLE , unit = "Count"
460+ )
461+ self .event_loop_end_cycle = self .meter .create_counter (name = constants .STRANDS_EVENT_LOOP_END_CYCLE , unit = "Count" )
462+ self .event_loop_cycle_duration = self .meter .create_histogram (
463+ name = constants .STRANDS_EVENT_LOOP_CYCLE_DURATION , unit = "s"
464+ )
465+ self .event_loop_latency = self .meter .create_histogram (name = constants .STRANDS_EVENT_LOOP_LATENCY , unit = "ms" )
466+ self .tool_call_count = self .meter .create_counter (name = constants .STRANDS_TOOL_CALL_COUNT , unit = "Count" )
467+ self .tool_success_count = self .meter .create_counter (name = constants .STRANDS_TOOL_SUCCESS_COUNT , unit = "Count" )
468+ self .tool_error_count = self .meter .create_counter (name = constants .STRANDS_TOOL_ERROR_COUNT , unit = "Count" )
469+ self .tool_duration = self .meter .create_histogram (name = constants .STRANDS_TOOL_DURATION , unit = "s" )
470+ self .event_loop_input_tokens = self .meter .create_histogram (
471+ name = constants .STRANDS_EVENT_LOOP_INPUT_TOKENS , unit = "token"
472+ )
473+ self .event_loop_output_tokens = self .meter .create_histogram (
474+ name = constants .STRANDS_EVENT_LOOP_OUTPUT_TOKENS , unit = "token"
403475 )
0 commit comments