Skip to content

Commit 45731d3

Browse files
authored
Merge branch '3.x-staging' into munir/upgrade-min-aws-lambda
2 parents 50ad4a4 + 5b4ffa6 commit 45731d3

File tree

8 files changed

+39
-139
lines changed

8 files changed

+39
-139
lines changed

.github/workflows/check_safe_main_merge.yml

Lines changed: 0 additions & 27 deletions
This file was deleted.

.riot/requirements/16562eb.txt

Lines changed: 0 additions & 32 deletions
This file was deleted.

.riot/requirements/df60af6.txt

Lines changed: 0 additions & 52 deletions
This file was deleted.

ddtrace/llmobs/_evaluators/runner.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from ddtrace.internal import forksafe
66
from ddtrace.internal.logger import get_logger
77
from ddtrace.internal.periodic import PeriodicService
8+
from ddtrace.internal.service import ServiceStatus
89
from ddtrace.internal.telemetry import telemetry_writer
910
from ddtrace.internal.telemetry.constants import TELEMETRY_NAMESPACE
1011
from ddtrace.llmobs._evaluators.ragas.answer_relevancy import RagasAnswerRelevancyEvaluator
@@ -94,6 +95,8 @@ def recreate(self) -> "EvaluatorRunner":
9495
)
9596

9697
def enqueue(self, span_event: Dict, span: Span) -> None:
98+
if self.status == ServiceStatus.STOPPED:
99+
return
97100
with self._lock:
98101
if len(self._buffer) >= self._buffer_limit:
99102
logger.warning(
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
LLM Observability: This fix resolves an issue where spans were being enqueued to an inactive evaluator runner which caused noisy logs
5+
related to the evaluator runner buffer being full.

riotfile.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2492,13 +2492,6 @@ def select_pys(min_version: str = MIN_PYTHON_VERSION, max_version: str = MAX_PYT
24922492
"pytest-randomly": latest,
24932493
},
24942494
venvs=[
2495-
Venv(
2496-
pys="3.7",
2497-
pkgs={
2498-
"openai[datalib]": "==1.30.1",
2499-
"pillow": "==9.5.0",
2500-
},
2501-
),
25022495
Venv(
25032496
pys=select_pys(min_version="3.8", max_version="3.11"),
25042497
pkgs={
@@ -2927,11 +2920,13 @@ def select_pys(min_version: str = MIN_PYTHON_VERSION, max_version: str = MAX_PYT
29272920
Venv(
29282921
name="llmobs",
29292922
command="pytest {cmdargs} tests/llmobs",
2930-
pkgs={"vcrpy": latest, "pytest-asyncio": "==0.21.1"},
2931-
venvs=[
2932-
Venv(pys="3.7"),
2933-
Venv(pys=select_pys(min_version="3.8"), pkgs={"ragas": "==0.1.21", "langchain": latest}),
2934-
],
2923+
pkgs={
2924+
"vcrpy": latest,
2925+
"pytest-asyncio": "==0.21.1",
2926+
"ragas": "==0.1.21",
2927+
"langchain": latest,
2928+
},
2929+
pys=select_pys(min_version="3.8"),
29352930
),
29362931
Venv(
29372932
name="profile",

tests/llmobs/test_llmobs_evaluator_runner.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,39 +18,44 @@
1818
DUMMY_SPAN = Span("dummy_span")
1919

2020

21-
def test_evaluator_runner_start(mock_evaluator_logs):
22-
evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=mock.MagicMock())
23-
evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=mock.MagicMock()))
21+
@pytest.fixture
22+
def active_evaluator_runner(llmobs):
23+
evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=llmobs)
24+
evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=llmobs))
2425
evaluator_runner.start()
26+
yield evaluator_runner
27+
28+
29+
def test_evaluator_runner_start(mock_evaluator_logs, active_evaluator_runner):
2530
mock_evaluator_logs.debug.assert_has_calls([mock.call("started %r", "EvaluatorRunner")])
2631

2732

28-
def test_evaluator_runner_buffer_limit(mock_evaluator_logs):
29-
evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=mock.MagicMock())
33+
def test_evaluator_runner_buffer_limit(mock_evaluator_logs, active_evaluator_runner):
3034
for _ in range(1001):
31-
evaluator_runner.enqueue({}, DUMMY_SPAN)
35+
active_evaluator_runner.enqueue({}, DUMMY_SPAN)
3236
mock_evaluator_logs.warning.assert_called_with(
3337
"%r event buffer full (limit is %d), dropping event", "EvaluatorRunner", 1000
3438
)
3539

3640

37-
def test_evaluator_runner_periodic_enqueues_eval_metric(llmobs, mock_llmobs_eval_metric_writer):
38-
evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=llmobs)
39-
evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=llmobs))
40-
evaluator_runner.enqueue({"span_id": "123", "trace_id": "1234"}, DUMMY_SPAN)
41-
evaluator_runner.periodic()
41+
def test_evaluator_runner_periodic_enqueues_eval_metric(mock_llmobs_eval_metric_writer, active_evaluator_runner):
42+
active_evaluator_runner.enqueue({"span_id": "123", "trace_id": "1234"}, DUMMY_SPAN)
43+
active_evaluator_runner.periodic()
4244
mock_llmobs_eval_metric_writer.enqueue.assert_called_once_with(
4345
_dummy_evaluator_eval_metric_event(span_id="123", trace_id="1234")
4446
)
4547

4648

47-
@pytest.mark.vcr_logs
48-
def test_evaluator_runner_timed_enqueues_eval_metric(llmobs, mock_llmobs_eval_metric_writer):
49-
evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=llmobs)
50-
evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=llmobs))
49+
def test_evaluator_runner_stopped_does_not_enqueue_metric(llmobs, mock_llmobs_eval_metric_writer):
50+
evaluator_runner = EvaluatorRunner(interval=0.1, llmobs_service=llmobs)
5151
evaluator_runner.start()
52-
5352
evaluator_runner.enqueue({"span_id": "123", "trace_id": "1234"}, DUMMY_SPAN)
53+
assert not evaluator_runner._buffer
54+
assert mock_llmobs_eval_metric_writer.enqueue.call_count == 0
55+
56+
57+
def test_evaluator_runner_timed_enqueues_eval_metric(llmobs, mock_llmobs_eval_metric_writer, active_evaluator_runner):
58+
active_evaluator_runner.enqueue({"span_id": "123", "trace_id": "1234"}, DUMMY_SPAN)
5459

5560
time.sleep(0.1)
5661

tests/llmobs/test_llmobs_service.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1383,6 +1383,7 @@ def test_llmobs_fork_recreates_and_restarts_eval_metric_writer():
13831383

13841384
def test_llmobs_fork_recreates_and_restarts_evaluator_runner(mock_ragas_evaluator):
13851385
"""Test that forking a process correctly recreates and restarts the EvaluatorRunner."""
1386+
pytest.importorskip("ragas")
13861387
with override_env(dict(_DD_LLMOBS_EVALUATORS="ragas_faithfulness")):
13871388
with mock.patch("ddtrace.llmobs._evaluators.runner.EvaluatorRunner.periodic"):
13881389
llmobs_service.enable(_tracer=DummyTracer(), ml_app="test_app")
@@ -1464,6 +1465,8 @@ def test_llmobs_fork_submit_evaluation(monkeypatch):
14641465
def test_llmobs_fork_evaluator_runner_run(monkeypatch):
14651466
"""Test that forking a process correctly encodes new spans created in each process."""
14661467
monkeypatch.setenv("_DD_LLMOBS_EVALUATOR_INTERVAL", 5.0)
1468+
pytest.importorskip("ragas")
1469+
monkeypatch.setenv("_DD_LLMOBS_EVALUATORS", "ragas_faithfulness")
14671470
with mock.patch("ddtrace.llmobs._evaluators.runner.EvaluatorRunner.periodic"):
14681471
llmobs_service.enable(_tracer=DummyTracer(), ml_app="test_app", api_key="test_api_key")
14691472
pid = os.fork()

0 commit comments

Comments
 (0)