Skip to content

Commit c4d0384

Browse files
authored
Do not trim span descriptions. (#1983)
- Made sure that span descriptions are never trimmed. (for all op values, not just db spans.) - Removed the experimental smart_transaction_trimming option - Also removed some dead code that was never executed because the experimental option defaults to False.
1 parent 5d9cd4f commit c4d0384

File tree

4 files changed

+31
-148
lines changed

4 files changed

+31
-148
lines changed

sentry_sdk/client.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -320,12 +320,7 @@ def _prepare_event(
320320
# Postprocess the event here so that annotated types do
321321
# generally not surface in before_send
322322
if event is not None:
323-
event = serialize(
324-
event,
325-
smart_transaction_trimming=self.options["_experiments"].get(
326-
"smart_transaction_trimming"
327-
),
328-
)
323+
event = serialize(event)
329324

330325
before_send = self.options["before_send"]
331326
if before_send is not None and event.get("type") != "transaction":

sentry_sdk/consts.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@
3333
{
3434
"max_spans": Optional[int],
3535
"record_sql_params": Optional[bool],
36-
"smart_transaction_trimming": Optional[bool],
37-
# TODO: Remvoe these 2 profiling related experiments
36+
# TODO: Remove these 2 profiling related experiments
3837
"profiles_sample_rate": Optional[float],
3938
"profiler_mode": Optional[ProfilerMode],
4039
},

sentry_sdk/serializer.py

Lines changed: 15 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,9 @@
88
capture_internal_exception,
99
disable_capture_event,
1010
format_timestamp,
11-
json_dumps,
1211
safe_repr,
1312
strip_string,
1413
)
15-
16-
import sentry_sdk.utils
17-
1814
from sentry_sdk._compat import (
1915
text_type,
2016
PY2,
@@ -23,12 +19,9 @@
2319
iteritems,
2420
binary_sequence_types,
2521
)
26-
2722
from sentry_sdk._types import TYPE_CHECKING
2823

2924
if TYPE_CHECKING:
30-
from datetime import timedelta
31-
3225
from types import TracebackType
3326

3427
from typing import Any
@@ -37,7 +30,6 @@
3730
from typing import Dict
3831
from typing import List
3932
from typing import Optional
40-
from typing import Tuple
4133
from typing import Type
4234
from typing import Union
4335

@@ -120,12 +112,11 @@ def __exit__(
120112
self._ids.pop(id(self._objs.pop()), None)
121113

122114

123-
def serialize(event, smart_transaction_trimming=False, **kwargs):
124-
# type: (Event, bool, **Any) -> Event
115+
def serialize(event, **kwargs):
116+
# type: (Event, **Any) -> Event
125117
memo = Memo()
126118
path = [] # type: List[Segment]
127119
meta_stack = [] # type: List[Dict[str, Any]]
128-
span_description_bytes = [] # type: List[int]
129120

130121
def _annotate(**meta):
131122
# type: (**Any) -> None
@@ -365,113 +356,23 @@ def _serialize_node_impl(
365356
if not isinstance(obj, string_types):
366357
obj = safe_repr(obj)
367358

368-
# Allow span descriptions to be longer than other strings.
369-
#
370-
# For database auto-instrumented spans, the description contains
371-
# potentially long SQL queries that are most useful when not truncated.
372-
# Because arbitrarily large events may be discarded by the server as a
373-
# protection mechanism, we dynamically limit the description length
374-
# later in _truncate_span_descriptions.
375-
if (
376-
smart_transaction_trimming
377-
and len(path) == 3
378-
and path[0] == "spans"
379-
and path[-1] == "description"
380-
):
381-
span_description_bytes.append(len(obj))
359+
is_span_description = (
360+
len(path) == 3 and path[0] == "spans" and path[-1] == "description"
361+
)
362+
if is_span_description:
382363
return obj
383-
return _flatten_annotated(strip_string(obj))
384364

385-
def _truncate_span_descriptions(serialized_event, event, excess_bytes):
386-
# type: (Event, Event, int) -> None
387-
"""
388-
Modifies serialized_event in-place trying to remove excess_bytes from
389-
span descriptions. The original event is used read-only to access the
390-
span timestamps (represented as RFC3399-formatted strings in
391-
serialized_event).
392-
393-
It uses heuristics to prioritize preserving the description of spans
394-
that might be the most interesting ones in terms of understanding and
395-
optimizing performance.
396-
"""
397-
# When truncating a description, preserve a small prefix.
398-
min_length = 10
399-
400-
def shortest_duration_longest_description_first(args):
401-
# type: (Tuple[int, Span]) -> Tuple[timedelta, int]
402-
i, serialized_span = args
403-
span = event["spans"][i]
404-
now = datetime.utcnow()
405-
start = span.get("start_timestamp") or now
406-
end = span.get("timestamp") or now
407-
duration = end - start
408-
description = serialized_span.get("description") or ""
409-
return (duration, -len(description))
410-
411-
# Note: for simplicity we sort spans by exact duration and description
412-
# length. If ever needed, we could have a more involved heuristic, e.g.
413-
# replacing exact durations with "buckets" and/or looking at other span
414-
# properties.
415-
path.append("spans")
416-
for i, span in sorted(
417-
enumerate(serialized_event.get("spans") or []),
418-
key=shortest_duration_longest_description_first,
419-
):
420-
description = span.get("description") or ""
421-
if len(description) <= min_length:
422-
continue
423-
excess_bytes -= len(description) - min_length
424-
path.extend([i, "description"])
425-
# Note: the last time we call strip_string we could preserve a few
426-
# more bytes up to a total length of MAX_EVENT_BYTES. Since that's
427-
# not strictly required, we leave it out for now for simplicity.
428-
span["description"] = _flatten_annotated(
429-
strip_string(description, max_length=min_length)
430-
)
431-
del path[-2:]
432-
del meta_stack[len(path) + 1 :]
433-
434-
if excess_bytes <= 0:
435-
break
436-
path.pop()
437-
del meta_stack[len(path) + 1 :]
365+
return _flatten_annotated(strip_string(obj))
438366

367+
#
368+
# Start of serialize() function
369+
#
439370
disable_capture_event.set(True)
440371
try:
441-
rv = _serialize_node(event, **kwargs)
442-
if meta_stack and isinstance(rv, dict):
443-
rv["_meta"] = meta_stack[0]
444-
445-
sum_span_description_bytes = sum(span_description_bytes)
446-
if smart_transaction_trimming and sum_span_description_bytes > 0:
447-
span_count = len(event.get("spans") or [])
448-
# This is an upper bound of how many bytes all descriptions would
449-
# consume if the usual string truncation in _serialize_node_impl
450-
# would have taken place, not accounting for the metadata attached
451-
# as event["_meta"].
452-
descriptions_budget_bytes = span_count * sentry_sdk.utils.MAX_STRING_LENGTH
453-
454-
# If by not truncating descriptions we ended up with more bytes than
455-
# per the usual string truncation, check if the event is too large
456-
# and we need to truncate some descriptions.
457-
#
458-
# This is guarded with an if statement to avoid JSON-encoding the
459-
# event unnecessarily.
460-
if sum_span_description_bytes > descriptions_budget_bytes:
461-
original_bytes = len(json_dumps(rv))
462-
excess_bytes = original_bytes - MAX_EVENT_BYTES
463-
if excess_bytes > 0:
464-
# Event is too large, will likely be discarded by the
465-
# server. Trim it down before sending.
466-
_truncate_span_descriptions(rv, event, excess_bytes)
467-
468-
# Span descriptions truncated, set or reset _meta.
469-
#
470-
# We run the same code earlier because we want to account
471-
# for _meta when calculating original_bytes, the number of
472-
# bytes in the JSON-encoded event.
473-
if meta_stack and isinstance(rv, dict):
474-
rv["_meta"] = meta_stack[0]
475-
return rv
372+
serialized_event = _serialize_node(event, **kwargs)
373+
if meta_stack and isinstance(serialized_event, dict):
374+
serialized_event["_meta"] = meta_stack[0]
375+
376+
return serialized_event
476377
finally:
477378
disable_capture_event.set(False)

tests/integrations/sqlalchemy/test_sqlalchemy.py

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88

99
from sentry_sdk import capture_message, start_transaction, configure_scope
1010
from sentry_sdk.integrations.sqlalchemy import SqlalchemyIntegration
11-
from sentry_sdk.utils import json_dumps, MAX_STRING_LENGTH
1211
from sentry_sdk.serializer import MAX_EVENT_BYTES
12+
from sentry_sdk.utils import json_dumps, MAX_STRING_LENGTH
1313

1414

1515
def test_orm_queries(sentry_init, capture_events):
@@ -143,7 +143,6 @@ def test_long_sql_query_preserved(sentry_init, capture_events):
143143
sentry_init(
144144
traces_sample_rate=1,
145145
integrations=[SqlalchemyIntegration()],
146-
_experiments={"smart_transaction_trimming": True},
147146
)
148147
events = capture_events()
149148

@@ -158,11 +157,10 @@ def test_long_sql_query_preserved(sentry_init, capture_events):
158157
assert description.endswith("SELECT 98 UNION SELECT 99")
159158

160159

161-
def test_too_large_event_truncated(sentry_init, capture_events):
160+
def test_large_event_not_truncated(sentry_init, capture_events):
162161
sentry_init(
163162
traces_sample_rate=1,
164163
integrations=[SqlalchemyIntegration()],
165-
_experiments={"smart_transaction_trimming": True},
166164
)
167165
events = capture_events()
168166

@@ -178,36 +176,26 @@ def processor(event, hint):
178176
engine = create_engine("sqlite:///:memory:")
179177
with start_transaction(name="test"):
180178
with engine.connect() as con:
181-
for _ in range(2000):
179+
for _ in range(1500):
182180
con.execute(" UNION ".join("SELECT {}".format(i) for i in range(100)))
183181

184182
(event,) = events
185183

186-
# Because of attached metadata in the "_meta" key, we may send out a little
187-
# bit more than MAX_EVENT_BYTES.
188-
max_bytes = 1.2 * MAX_EVENT_BYTES
189-
assert len(json_dumps(event)) < max_bytes
184+
assert len(json_dumps(event)) > MAX_EVENT_BYTES
190185

191186
# Some spans are discarded.
192187
assert len(event["spans"]) == 1000
193188

194-
for i, span in enumerate(event["spans"]):
195-
description = span["description"]
196-
197-
assert description.startswith("SELECT ")
198-
if str(i) in event["_meta"]["spans"]:
199-
# Description must have been truncated
200-
assert len(description) == 10
201-
assert description.endswith("...")
202-
else:
203-
# Description was not truncated, check for original length
204-
assert len(description) == 1583
205-
assert description.endswith("SELECT 98 UNION SELECT 99")
206-
207-
# Smoke check the meta info for one of the spans.
208-
assert next(iter(event["_meta"]["spans"].values())) == {
209-
"description": {"": {"len": 1583, "rem": [["!limit", "x", 7, 10]]}}
210-
}
189+
# Span descriptions are not truncated.
190+
description = event["spans"][0]["description"]
191+
assert len(description) == 1583
192+
assert description.startswith("SELECT 0")
193+
assert description.endswith("SELECT 98 UNION SELECT 99")
194+
195+
description = event["spans"][999]["description"]
196+
assert len(description) == 1583
197+
assert description.startswith("SELECT 0")
198+
assert description.endswith("SELECT 98 UNION SELECT 99")
211199

212200
# Smoke check that truncation of other fields has not changed.
213201
assert len(event["message"]) == MAX_STRING_LENGTH

0 commit comments

Comments
 (0)