@@ -100,32 +100,32 @@ def test_traces(trace_service):
100
100
101
101
attributes = decode_attributes (
102
102
request .resource_spans [0 ].scope_spans [0 ].spans [0 ].attributes )
103
- assert attributes .get (SpanAttributes .LLM_RESPONSE_MODEL ) == model
103
+ assert attributes .get (SpanAttributes .GEN_AI_RESPONSE_MODEL ) == model
104
104
assert attributes .get (
105
- SpanAttributes .LLM_REQUEST_ID ) == outputs [0 ].request_id
105
+ SpanAttributes .GEN_AI_REQUEST_ID ) == outputs [0 ].request_id
106
+ assert attributes .get (SpanAttributes .GEN_AI_REQUEST_TEMPERATURE
107
+ ) == sampling_params .temperature
106
108
assert attributes .get (
107
- SpanAttributes .LLM_REQUEST_TEMPERATURE ) == sampling_params .temperature
109
+ SpanAttributes .GEN_AI_REQUEST_TOP_P ) == sampling_params .top_p
108
110
assert attributes .get (
109
- SpanAttributes .LLM_REQUEST_TOP_P ) == sampling_params .top_p
110
- assert attributes .get (
111
- SpanAttributes .LLM_REQUEST_MAX_TOKENS ) == sampling_params .max_tokens
112
- assert attributes .get (SpanAttributes .LLM_REQUEST_N ) == sampling_params .n
113
- assert attributes .get (SpanAttributes .LLM_USAGE_PROMPT_TOKENS ) == len (
111
+ SpanAttributes .GEN_AI_REQUEST_MAX_TOKENS ) == sampling_params .max_tokens
112
+ assert attributes .get (SpanAttributes .GEN_AI_REQUEST_N ) == sampling_params .n
113
+ assert attributes .get (SpanAttributes .GEN_AI_USAGE_PROMPT_TOKENS ) == len (
114
114
outputs [0 ].prompt_token_ids )
115
115
completion_tokens = sum (len (o .token_ids ) for o in outputs [0 ].outputs )
116
116
assert attributes .get (
117
- SpanAttributes .LLM_USAGE_COMPLETION_TOKENS ) == completion_tokens
117
+ SpanAttributes .GEN_AI_USAGE_COMPLETION_TOKENS ) == completion_tokens
118
118
metrics = outputs [0 ].metrics
119
119
assert attributes .get (
120
- SpanAttributes .LLM_LATENCY_TIME_IN_QUEUE ) == metrics .time_in_queue
120
+ SpanAttributes .GEN_AI_LATENCY_TIME_IN_QUEUE ) == metrics .time_in_queue
121
121
ttft = metrics .first_token_time - metrics .arrival_time
122
122
assert attributes .get (
123
- SpanAttributes .LLM_LATENCY_TIME_TO_FIRST_TOKEN ) == ttft
123
+ SpanAttributes .GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN ) == ttft
124
124
e2e_time = metrics .finished_time - metrics .arrival_time
125
- assert attributes .get (SpanAttributes .LLM_LATENCY_E2E ) == e2e_time
125
+ assert attributes .get (SpanAttributes .GEN_AI_LATENCY_E2E ) == e2e_time
126
126
assert metrics .scheduler_time > 0
127
- assert attributes .get (
128
- SpanAttributes . LLM_LATENCY_TIME_IN_SCHEDULER ) == metrics .scheduler_time
127
+ assert attributes .get (SpanAttributes . GEN_AI_LATENCY_TIME_IN_SCHEDULER
128
+ ) == metrics .scheduler_time
129
129
# Model forward and model execute should be none, since detailed traces is
130
130
# not enabled.
131
131
assert metrics .model_forward_time is None
@@ -166,37 +166,37 @@ def test_traces_with_detailed_steps(trace_service):
166
166
167
167
attributes = decode_attributes (
168
168
request .resource_spans [0 ].scope_spans [0 ].spans [0 ].attributes )
169
- assert attributes .get (SpanAttributes .LLM_RESPONSE_MODEL ) == model
169
+ assert attributes .get (SpanAttributes .GEN_AI_RESPONSE_MODEL ) == model
170
170
assert attributes .get (
171
- SpanAttributes .LLM_REQUEST_ID ) == outputs [0 ].request_id
171
+ SpanAttributes .GEN_AI_REQUEST_ID ) == outputs [0 ].request_id
172
+ assert attributes .get (SpanAttributes .GEN_AI_REQUEST_TEMPERATURE
173
+ ) == sampling_params .temperature
172
174
assert attributes .get (
173
- SpanAttributes .LLM_REQUEST_TEMPERATURE ) == sampling_params .temperature
175
+ SpanAttributes .GEN_AI_REQUEST_TOP_P ) == sampling_params .top_p
174
176
assert attributes .get (
175
- SpanAttributes .LLM_REQUEST_TOP_P ) == sampling_params .top_p
176
- assert attributes .get (
177
- SpanAttributes .LLM_REQUEST_MAX_TOKENS ) == sampling_params .max_tokens
178
- assert attributes .get (SpanAttributes .LLM_REQUEST_N ) == sampling_params .n
179
- assert attributes .get (SpanAttributes .LLM_USAGE_PROMPT_TOKENS ) == len (
177
+ SpanAttributes .GEN_AI_REQUEST_MAX_TOKENS ) == sampling_params .max_tokens
178
+ assert attributes .get (SpanAttributes .GEN_AI_REQUEST_N ) == sampling_params .n
179
+ assert attributes .get (SpanAttributes .GEN_AI_USAGE_PROMPT_TOKENS ) == len (
180
180
outputs [0 ].prompt_token_ids )
181
181
completion_tokens = sum (len (o .token_ids ) for o in outputs [0 ].outputs )
182
182
assert attributes .get (
183
- SpanAttributes .LLM_USAGE_COMPLETION_TOKENS ) == completion_tokens
183
+ SpanAttributes .GEN_AI_USAGE_COMPLETION_TOKENS ) == completion_tokens
184
184
metrics = outputs [0 ].metrics
185
185
assert attributes .get (
186
- SpanAttributes .LLM_LATENCY_TIME_IN_QUEUE ) == metrics .time_in_queue
186
+ SpanAttributes .GEN_AI_LATENCY_TIME_IN_QUEUE ) == metrics .time_in_queue
187
187
ttft = metrics .first_token_time - metrics .arrival_time
188
188
assert attributes .get (
189
- SpanAttributes .LLM_LATENCY_TIME_TO_FIRST_TOKEN ) == ttft
189
+ SpanAttributes .GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN ) == ttft
190
190
e2e_time = metrics .finished_time - metrics .arrival_time
191
- assert attributes .get (SpanAttributes .LLM_LATENCY_E2E ) == e2e_time
191
+ assert attributes .get (SpanAttributes .GEN_AI_LATENCY_E2E ) == e2e_time
192
192
assert metrics .scheduler_time > 0
193
- assert attributes .get (
194
- SpanAttributes . LLM_LATENCY_TIME_IN_SCHEDULER ) == metrics .scheduler_time
193
+ assert attributes .get (SpanAttributes . GEN_AI_LATENCY_TIME_IN_SCHEDULER
194
+ ) == metrics .scheduler_time
195
195
assert metrics .model_forward_time > 0
196
196
assert attributes .get (
197
- SpanAttributes .LLM_LATENCY_TIME_IN_MODEL_FORWARD ) == pytest .approx (
197
+ SpanAttributes .GEN_AI_LATENCY_TIME_IN_MODEL_FORWARD ) == pytest .approx (
198
198
metrics .model_forward_time / 1000 )
199
199
assert metrics .model_execute_time > 0
200
- assert attributes .get (SpanAttributes .LLM_LATENCY_TIME_IN_MODEL_EXECUTE
200
+ assert attributes .get (SpanAttributes .GEN_AI_LATENCY_TIME_IN_MODEL_EXECUTE
201
201
) == metrics .model_execute_time
202
202
assert metrics .model_forward_time < 1000 * metrics .model_execute_time
0 commit comments