From fe8ff06e6e7460fcc68a549856ade59a0d578acb Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Fri, 31 Mar 2023 13:53:41 +0200 Subject: [PATCH 1/4] Removed smart_transaction_trimming and made sure that span descriptions are not trimmed --- sentry_sdk/client.py | 7 +- sentry_sdk/consts.py | 3 +- sentry_sdk/serializer.py | 129 ++---------------- .../sqlalchemy/test_sqlalchemy.py | 2 - 4 files changed, 17 insertions(+), 124 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index e246f05363..2e73f60c9c 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -320,12 +320,7 @@ def _prepare_event( # Postprocess the event here so that annotated types do # generally not surface in before_send if event is not None: - event = serialize( - event, - smart_transaction_trimming=self.options["_experiments"].get( - "smart_transaction_trimming" - ), - ) + event = serialize(event) before_send = self.options["before_send"] if before_send is not None and event.get("type") != "transaction": diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 99f3ca4c1f..52e8b78548 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -33,8 +33,7 @@ { "max_spans": Optional[int], "record_sql_params": Optional[bool], - "smart_transaction_trimming": Optional[bool], - # TODO: Remvoe these 2 profiling related experiments + # TODO: Remove these 2 profiling related experiments "profiles_sample_rate": Optional[float], "profiler_mode": Optional[ProfilerMode], }, diff --git a/sentry_sdk/serializer.py b/sentry_sdk/serializer.py index 29495c3118..22eec490ae 100644 --- a/sentry_sdk/serializer.py +++ b/sentry_sdk/serializer.py @@ -8,13 +8,9 @@ capture_internal_exception, disable_capture_event, format_timestamp, - json_dumps, safe_repr, strip_string, ) - -import sentry_sdk.utils - from sentry_sdk._compat import ( text_type, PY2, @@ -23,12 +19,9 @@ iteritems, binary_sequence_types, ) - from sentry_sdk._types import TYPE_CHECKING if TYPE_CHECKING: - from datetime import timedelta - from types import TracebackType from typing import Any @@ -37,7 +30,6 @@ from typing import Dict from typing import List from typing import Optional - from typing import Tuple from typing import Type from typing import Union @@ -120,12 +112,11 @@ def __exit__( self._ids.pop(id(self._objs.pop()), None) -def serialize(event, smart_transaction_trimming=False, **kwargs): - # type: (Event, bool, **Any) -> Event +def serialize(event, **kwargs): + # type: (Event, **Any) -> Event memo = Memo() path = [] # type: List[Segment] meta_stack = [] # type: List[Dict[str, Any]] - span_description_bytes = [] # type: List[int] def _annotate(**meta): # type: (**Any) -> None @@ -365,113 +356,23 @@ def _serialize_node_impl( if not isinstance(obj, string_types): obj = safe_repr(obj) - # Allow span descriptions to be longer than other strings. - # - # For database auto-instrumented spans, the description contains - # potentially long SQL queries that are most useful when not truncated. - # Because arbitrarily large events may be discarded by the server as a - # protection mechanism, we dynamically limit the description length - # later in _truncate_span_descriptions. - if ( - smart_transaction_trimming - and len(path) == 3 - and path[0] == "spans" - and path[-1] == "description" - ): - span_description_bytes.append(len(obj)) + is_span_description = ( + len(path) == 3 and path[0] == "spans" and path[-1] == "description" + ) + if is_span_description: return obj - return _flatten_annotated(strip_string(obj)) - def _truncate_span_descriptions(serialized_event, event, excess_bytes): - # type: (Event, Event, int) -> None - """ - Modifies serialized_event in-place trying to remove excess_bytes from - span descriptions. The original event is used read-only to access the - span timestamps (represented as RFC3399-formatted strings in - serialized_event). - - It uses heuristics to prioritize preserving the description of spans - that might be the most interesting ones in terms of understanding and - optimizing performance. - """ - # When truncating a description, preserve a small prefix. - min_length = 10 - - def shortest_duration_longest_description_first(args): - # type: (Tuple[int, Span]) -> Tuple[timedelta, int] - i, serialized_span = args - span = event["spans"][i] - now = datetime.utcnow() - start = span.get("start_timestamp") or now - end = span.get("timestamp") or now - duration = end - start - description = serialized_span.get("description") or "" - return (duration, -len(description)) - - # Note: for simplicity we sort spans by exact duration and description - # length. If ever needed, we could have a more involved heuristic, e.g. - # replacing exact durations with "buckets" and/or looking at other span - # properties. - path.append("spans") - for i, span in sorted( - enumerate(serialized_event.get("spans") or []), - key=shortest_duration_longest_description_first, - ): - description = span.get("description") or "" - if len(description) <= min_length: - continue - excess_bytes -= len(description) - min_length - path.extend([i, "description"]) - # Note: the last time we call strip_string we could preserve a few - # more bytes up to a total length of MAX_EVENT_BYTES. Since that's - # not strictly required, we leave it out for now for simplicity. - span["description"] = _flatten_annotated( - strip_string(description, max_length=min_length) - ) - del path[-2:] - del meta_stack[len(path) + 1 :] - - if excess_bytes <= 0: - break - path.pop() - del meta_stack[len(path) + 1 :] + return _flatten_annotated(strip_string(obj)) + # + # Start of serialize() function + # disable_capture_event.set(True) try: - rv = _serialize_node(event, **kwargs) - if meta_stack and isinstance(rv, dict): - rv["_meta"] = meta_stack[0] - - sum_span_description_bytes = sum(span_description_bytes) - if smart_transaction_trimming and sum_span_description_bytes > 0: - span_count = len(event.get("spans") or []) - # This is an upper bound of how many bytes all descriptions would - # consume if the usual string truncation in _serialize_node_impl - # would have taken place, not accounting for the metadata attached - # as event["_meta"]. - descriptions_budget_bytes = span_count * sentry_sdk.utils.MAX_STRING_LENGTH - - # If by not truncating descriptions we ended up with more bytes than - # per the usual string truncation, check if the event is too large - # and we need to truncate some descriptions. - # - # This is guarded with an if statement to avoid JSON-encoding the - # event unnecessarily. - if sum_span_description_bytes > descriptions_budget_bytes: - original_bytes = len(json_dumps(rv)) - excess_bytes = original_bytes - MAX_EVENT_BYTES - if excess_bytes > 0: - # Event is too large, will likely be discarded by the - # server. Trim it down before sending. - _truncate_span_descriptions(rv, event, excess_bytes) - - # Span descriptions truncated, set or reset _meta. - # - # We run the same code earlier because we want to account - # for _meta when calculating original_bytes, the number of - # bytes in the JSON-encoded event. - if meta_stack and isinstance(rv, dict): - rv["_meta"] = meta_stack[0] - return rv + serialized_event = _serialize_node(event, **kwargs) + if meta_stack and isinstance(serialized_event, dict): + serialized_event["_meta"] = meta_stack[0] + + return serialized_event finally: disable_capture_event.set(False) diff --git a/tests/integrations/sqlalchemy/test_sqlalchemy.py b/tests/integrations/sqlalchemy/test_sqlalchemy.py index e9d8c4e849..801fd8a8bd 100644 --- a/tests/integrations/sqlalchemy/test_sqlalchemy.py +++ b/tests/integrations/sqlalchemy/test_sqlalchemy.py @@ -143,7 +143,6 @@ def test_long_sql_query_preserved(sentry_init, capture_events): sentry_init( traces_sample_rate=1, integrations=[SqlalchemyIntegration()], - _experiments={"smart_transaction_trimming": True}, ) events = capture_events() @@ -162,7 +161,6 @@ def test_too_large_event_truncated(sentry_init, capture_events): sentry_init( traces_sample_rate=1, integrations=[SqlalchemyIntegration()], - _experiments={"smart_transaction_trimming": True}, ) events = capture_events() From bbf36a12fbbddf72bfd899427e13fc477acfc373 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Fri, 31 Mar 2023 14:32:54 +0200 Subject: [PATCH 2/4] Adapted tests --- scripts/runtox.sh | 2 +- .../sqlalchemy/test_sqlalchemy.py | 37 +++++++------------ tox.ini | 2 +- 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/scripts/runtox.sh b/scripts/runtox.sh index 07db62242b..e2d61db13d 100755 --- a/scripts/runtox.sh +++ b/scripts/runtox.sh @@ -16,4 +16,4 @@ fi searchstring="$1" export TOX_PARALLEL_NO_SPINNER=1 -exec $TOXPATH -vv -p auto -e "$($TOXPATH -l | grep "$searchstring" | tr $'\n' ',')" -- "${@:2}" +exec $TOXPATH -vv -e "$($TOXPATH -l | grep "$searchstring" | tr $'\n' ',')" -- "${@:2}" diff --git a/tests/integrations/sqlalchemy/test_sqlalchemy.py b/tests/integrations/sqlalchemy/test_sqlalchemy.py index 801fd8a8bd..cd2346164f 100644 --- a/tests/integrations/sqlalchemy/test_sqlalchemy.py +++ b/tests/integrations/sqlalchemy/test_sqlalchemy.py @@ -9,7 +9,6 @@ from sentry_sdk import capture_message, start_transaction, configure_scope from sentry_sdk.integrations.sqlalchemy import SqlalchemyIntegration from sentry_sdk.utils import json_dumps, MAX_STRING_LENGTH -from sentry_sdk.serializer import MAX_EVENT_BYTES def test_orm_queries(sentry_init, capture_events): @@ -157,7 +156,7 @@ def test_long_sql_query_preserved(sentry_init, capture_events): assert description.endswith("SELECT 98 UNION SELECT 99") -def test_too_large_event_truncated(sentry_init, capture_events): +def test_large_event_not_truncated(sentry_init, capture_events): sentry_init( traces_sample_rate=1, integrations=[SqlalchemyIntegration()], @@ -176,36 +175,26 @@ def processor(event, hint): engine = create_engine("sqlite:///:memory:") with start_transaction(name="test"): with engine.connect() as con: - for _ in range(2000): + for _ in range(1500): con.execute(" UNION ".join("SELECT {}".format(i) for i in range(100))) (event,) = events - # Because of attached metadata in the "_meta" key, we may send out a little - # bit more than MAX_EVENT_BYTES. - max_bytes = 1.2 * MAX_EVENT_BYTES - assert len(json_dumps(event)) < max_bytes + assert len(json_dumps(event)) == 1845267 # Some spans are discarded. assert len(event["spans"]) == 1000 - for i, span in enumerate(event["spans"]): - description = span["description"] - - assert description.startswith("SELECT ") - if str(i) in event["_meta"]["spans"]: - # Description must have been truncated - assert len(description) == 10 - assert description.endswith("...") - else: - # Description was not truncated, check for original length - assert len(description) == 1583 - assert description.endswith("SELECT 98 UNION SELECT 99") - - # Smoke check the meta info for one of the spans. - assert next(iter(event["_meta"]["spans"].values())) == { - "description": {"": {"len": 1583, "rem": [["!limit", "x", 7, 10]]}} - } + # Span descriptions are not truncated. + description = event["spans"][0]["description"] + assert len(description) == 1583 + assert description.startswith("SELECT 0") + assert description.endswith("SELECT 98 UNION SELECT 99") + + description = event["spans"][999]["description"] + assert len(description) == 1583 + assert description.startswith("SELECT 0") + assert description.endswith("SELECT 98 UNION SELECT 99") # Smoke check that truncation of other fields has not changed. assert len(event["message"]) == MAX_STRING_LENGTH diff --git a/tox.ini b/tox.ini index 24d1cd3b40..8374b8a6d1 100644 --- a/tox.ini +++ b/tox.ini @@ -508,7 +508,7 @@ commands = ; when loading tests in scenarios. In particular, django fails to ; load the settings from the test module. {py2.7}: python -m pytest --ignore-glob='*py3.py' --durations=5 -vvv {env:TESTPATH} {posargs} - {py3.5,py3.6,py3.7,py3.8,py3.9,py3.10,py3.11}: python -m pytest --durations=5 -vvv {env:TESTPATH} {posargs} + {py3.5,py3.6,py3.7,py3.8,py3.9,py3.10,py3.11}: python -m pytest --durations=5 -s -vvv {env:TESTPATH} {posargs} [testenv:linters] commands = From f3a988023d621660539dc9321d30513b7c991c5a Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Fri, 31 Mar 2023 14:37:51 +0200 Subject: [PATCH 3/4] Check for event just being very big --- tests/integrations/sqlalchemy/test_sqlalchemy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integrations/sqlalchemy/test_sqlalchemy.py b/tests/integrations/sqlalchemy/test_sqlalchemy.py index cd2346164f..d45ea36a19 100644 --- a/tests/integrations/sqlalchemy/test_sqlalchemy.py +++ b/tests/integrations/sqlalchemy/test_sqlalchemy.py @@ -8,6 +8,7 @@ from sentry_sdk import capture_message, start_transaction, configure_scope from sentry_sdk.integrations.sqlalchemy import SqlalchemyIntegration +from sentry_sdk.serializer import MAX_EVENT_BYTES from sentry_sdk.utils import json_dumps, MAX_STRING_LENGTH @@ -180,7 +181,7 @@ def processor(event, hint): (event,) = events - assert len(json_dumps(event)) == 1845267 + assert len(json_dumps(event)) > MAX_EVENT_BYTES # Some spans are discarded. assert len(event["spans"]) == 1000 From f372a265f924d5ee29aa6d856c6ccfd25c873831 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Fri, 31 Mar 2023 14:48:41 +0200 Subject: [PATCH 4/4] Reverted some accidental changes --- scripts/runtox.sh | 2 +- tox.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/runtox.sh b/scripts/runtox.sh index e2d61db13d..07db62242b 100755 --- a/scripts/runtox.sh +++ b/scripts/runtox.sh @@ -16,4 +16,4 @@ fi searchstring="$1" export TOX_PARALLEL_NO_SPINNER=1 -exec $TOXPATH -vv -e "$($TOXPATH -l | grep "$searchstring" | tr $'\n' ',')" -- "${@:2}" +exec $TOXPATH -vv -p auto -e "$($TOXPATH -l | grep "$searchstring" | tr $'\n' ',')" -- "${@:2}" diff --git a/tox.ini b/tox.ini index 8374b8a6d1..24d1cd3b40 100644 --- a/tox.ini +++ b/tox.ini @@ -508,7 +508,7 @@ commands = ; when loading tests in scenarios. In particular, django fails to ; load the settings from the test module. {py2.7}: python -m pytest --ignore-glob='*py3.py' --durations=5 -vvv {env:TESTPATH} {posargs} - {py3.5,py3.6,py3.7,py3.8,py3.9,py3.10,py3.11}: python -m pytest --durations=5 -s -vvv {env:TESTPATH} {posargs} + {py3.5,py3.6,py3.7,py3.8,py3.9,py3.10,py3.11}: python -m pytest --durations=5 -vvv {env:TESTPATH} {posargs} [testenv:linters] commands =