python · pablogsal · Dec 11, 2025 · Dec 10, 2025 · Dec 11, 2025 · Dec 11, 2025
@@ -470,9 +470,10 @@ which you can use to judge whether the data is sufficient for your analysis.
 Profiling modes
 ===============
 
-The sampling profiler supports three modes that control which samples are
+The sampling profiler supports four modes that control which samples are
 recorded. The mode determines what the profile measures: total elapsed time,
-CPU execution time, or time spent holding the global interpreter lock.
+CPU execution time, time spent holding the global interpreter lock, or
+exception handling.
 
 
 Wall-clock mode
@@ -553,6 +554,67 @@ single-threaded programs to distinguish Python execution time from time spent
 in C extensions or I/O.
 
 
+Exception mode
+--------------
+
+Exception mode (``--mode=exception``) records samples only when a thread has
+an active exception::
+
+   python -m profiling.sampling run --mode=exception script.py
+
+Samples are recorded in two situations: when an exception is being propagated
+up the call stack (after ``raise`` but before being caught), or when code is
+executing inside an ``except`` block where exception information is still
+present in the thread state.
+
+The following example illustrates which code regions are captured:
+
+.. code-block:: python
+
+   def example():
+       try:
+           raise ValueError("error")    # Captured: exception being raised
+       except ValueError:
+           process_error()              # Captured: inside except block
+       finally:
+           cleanup()                    # NOT captured: exception already handled
+
+   def example_propagating():
+       try:
+           try:
+               raise ValueError("error")
+           finally:
+               cleanup()                # Captured: exception propagating through
+       except ValueError:
+           pass
+
+   def example_no_exception():
+       try:
+           do_work()
+       finally:
+           cleanup()                    # NOT captured: no exception involved
+
+Note that ``finally`` blocks are only captured when an exception is actively
+propagating through them. Once an ``except`` block finishes executing, Python
+clears the exception information before running any subsequent ``finally``
+block. Similarly, ``finally`` blocks that run during normal execution (when no
+exception was raised) are not captured because no exception state is present.
+
+This mode is useful for understanding where your program spends time handling
+errors. Exception handling can be a significant source of overhead in code
+that uses exceptions for flow control (such as ``StopIteration`` in iterators)
+or in applications that process many error conditions (such as network servers
+handling connection failures).
+
+Exception mode helps answer questions like "how much time is spent handling
+exceptions?" and "which exception handlers are the most expensive?" It can
+reveal hidden performance costs in code that catches and processes many
+exceptions, even when those exceptions are handled gracefully. For example,
+if a parsing library uses exceptions internally to signal format errors, this
+mode will capture time spent in those handlers even if the calling code never
+sees the exceptions.
+
+
 Output formats
 ==============
 
@@ -1006,8 +1068,9 @@ Mode options
 
 .. option:: --mode <mode>
 
-   Sampling mode: ``wall`` (default), ``cpu``, or ``gil``.
-   The ``cpu`` and ``gil`` modes are incompatible with ``--async-aware``.
+   Sampling mode: ``wall`` (default), ``cpu``, ``gil``, or ``exception``.
+   The ``cpu``, ``gil``, and ``exception`` modes are incompatible with
+   ``--async-aware``.
 
 .. option:: --async-mode <mode>
 

@@ -146,6 +146,8 @@ Key features include:
     and blocking. Use this to identify CPU-bound bottlenecks and optimize computational work.
   * **GIL-holding time** (``--mode gil``): Measures time spent holding Python's Global Interpreter
     Lock. Use this to identify which threads dominate GIL usage in multi-threaded applications.
+  * **Exception handling time** (``--mode exception``): Captures samples only from threads with
+    an active exception. Use this to analyze exception handling overhead.
 
 * **Thread-aware profiling**: Option to profile all threads (``-a``) or just the main thread,
   essential for understanding multi-threaded application behavior.
@@ -175,6 +177,10 @@ Key features include:
   (``--async-aware``). See which coroutines are consuming time, with options to show only
   running tasks or all tasks including those waiting.
 
+* **Opcode-level profiling**: Gather bytecode opcode information for instruction-level
+  profiling (``--opcodes``). Shows which bytecode instructions are executing, including
+  specializations from the adaptive interpreter.
+
 See :mod:`profiling.sampling` for the complete documentation, including all
 available output formats, profiling modes, and configuration options.
 

diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h
@@ -110,8 +110,15 @@ typedef struct _Py_DebugOffsets {
         uint64_t status;
         uint64_t holds_gil;
         uint64_t gil_requested;
+        uint64_t current_exception;
+        uint64_t exc_state;
     } thread_state;
 
+    // Exception stack item offset
+    struct {
+        uint64_t exc_value;
+    } err_stackitem;
+
     // InterpreterFrame offset;
     struct _interpreter_frame {
         uint64_t size;
@@ -282,6 +289,11 @@ typedef struct _Py_DebugOffsets {
         .status = offsetof(PyThreadState, _status), \
         .holds_gil = offsetof(PyThreadState, holds_gil), \
         .gil_requested = offsetof(PyThreadState, gil_requested), \
+        .current_exception = offsetof(PyThreadState, current_exception), \
+        .exc_state = offsetof(PyThreadState, exc_state), \
+    }, \
+    .err_stackitem = { \
+        .exc_value = offsetof(_PyErr_StackItem, exc_value), \
     }, \
     .interpreter_frame = { \
         .size = sizeof(_PyInterpreterFrame), \

@@ -717,6 +717,10 @@ function populateThreadStats(data, selectedThreadId = null) {
 
   const gcPctElem = document.getElementById('gc-pct');
   if (gcPctElem) gcPctElem.textContent = `${(threadStats.gc_pct || 0).toFixed(1)}%`;
+
+  // Exception stats
+  const excPctElem = document.getElementById('exc-pct');
+  if (excPctElem) excPctElem.textContent = `${(threadStats.has_exception_pct || 0).toFixed(1)}%`;
 }
 
 // ============================================================================

@@ -161,6 +161,10 @@ <h3 class="section-title">Runtime Stats</h3>
                     <div class="stat-tile-value" id="gc-pct">--</div>
                     <div class="stat-tile-label">GC</div>
                   </div>
+                  <div class="stat-tile stat-tile--red" id="exc-stat">
+                    <div class="stat-tile-value" id="exc-pct">--</div>
+                    <div class="stat-tile-label">Exception</div>
+                  </div>
                 </div>
               </div>
             </section>

@@ -16,6 +16,7 @@
     PROFILING_MODE_WALL,
     PROFILING_MODE_CPU,
     PROFILING_MODE_GIL,
+    PROFILING_MODE_EXCEPTION,
     SORT_MODE_NSAMPLES,
     SORT_MODE_TOTTIME,
     SORT_MODE_CUMTIME,
@@ -90,6 +91,7 @@ def _parse_mode(mode_string):
         "wall": PROFILING_MODE_WALL,
         "cpu": PROFILING_MODE_CPU,
         "gil": PROFILING_MODE_GIL,
+        "exception": PROFILING_MODE_EXCEPTION,
     }
     return mode_map[mode_string]
 
@@ -213,10 +215,12 @@ def _add_mode_options(parser):
     mode_group = parser.add_argument_group("Mode options")
     mode_group.add_argument(
         "--mode",
-        choices=["wall", "cpu", "gil"],
+        choices=["wall", "cpu", "gil", "exception"],
         default="wall",
         help="Sampling mode: wall (all samples), cpu (only samples when thread is on CPU), "
-        "gil (only samples when thread holds the GIL). Incompatible with --async-aware",
+        "gil (only samples when thread holds the GIL), "
+        "exception (only samples when thread has an active exception). "
+        "Incompatible with --async-aware",
     )
     mode_group.add_argument(
         "--async-mode",

@@ -5,6 +5,7 @@
     THREAD_STATUS_ON_CPU,
     THREAD_STATUS_GIL_REQUESTED,
     THREAD_STATUS_UNKNOWN,
+    THREAD_STATUS_HAS_EXCEPTION,
 )
 
 try:
@@ -170,7 +171,7 @@ def _collect_thread_status_stats(self, stack_frames):
 
         Returns:
             tuple: (aggregate_status_counts, has_gc_frame, per_thread_stats)
-                - aggregate_status_counts: dict with has_gil, on_cpu, etc.
+                - aggregate_status_counts: dict with has_gil, on_cpu, has_exception, etc.
                 - has_gc_frame: bool indicating if any thread has GC frames
                 - per_thread_stats: dict mapping thread_id to per-thread counts
         """
@@ -179,6 +180,7 @@ def _collect_thread_status_stats(self, stack_frames):
             "on_cpu": 0,
             "gil_requested": 0,
             "unknown": 0,
+            "has_exception": 0,
             "total": 0,
         }
         has_gc_frame = False
@@ -200,6 +202,8 @@ def _collect_thread_status_stats(self, stack_frames):
                     status_counts["gil_requested"] += 1
                 if status_flags & THREAD_STATUS_UNKNOWN:
                     status_counts["unknown"] += 1
+                if status_flags & THREAD_STATUS_HAS_EXCEPTION:
+                    status_counts["has_exception"] += 1
 
                 # Track per-thread statistics
                 thread_id = getattr(thread_info, "thread_id", None)
@@ -210,6 +214,7 @@ def _collect_thread_status_stats(self, stack_frames):
                             "on_cpu": 0,
                             "gil_requested": 0,
                             "unknown": 0,
+                            "has_exception": 0,
                             "total": 0,
                             "gc_samples": 0,
                         }
@@ -225,6 +230,8 @@ def _collect_thread_status_stats(self, stack_frames):
                         thread_stats["gil_requested"] += 1
                     if status_flags & THREAD_STATUS_UNKNOWN:
                         thread_stats["unknown"] += 1
+                    if status_flags & THREAD_STATUS_HAS_EXCEPTION:
+                        thread_stats["has_exception"] += 1
 
                     # Check for GC frames in this thread
                     frames = getattr(thread_info, "frame_info", None)

@@ -5,6 +5,7 @@
 PROFILING_MODE_CPU = 1
 PROFILING_MODE_GIL = 2
 PROFILING_MODE_ALL = 3  # Combines GIL + CPU checks
+PROFILING_MODE_EXCEPTION = 4  # Only samples when thread has an active exception
 
 # Sort mode constants
 SORT_MODE_NSAMPLES = 0
@@ -25,10 +26,12 @@
         THREAD_STATUS_ON_CPU,
         THREAD_STATUS_UNKNOWN,
         THREAD_STATUS_GIL_REQUESTED,
+        THREAD_STATUS_HAS_EXCEPTION,
     )
 except ImportError:
     # Fallback for tests or when module is not available
     THREAD_STATUS_HAS_GIL = (1 << 0)
     THREAD_STATUS_ON_CPU = (1 << 1)
     THREAD_STATUS_UNKNOWN = (1 << 2)
     THREAD_STATUS_GIL_REQUESTED = (1 << 3)
+    THREAD_STATUS_HAS_EXCEPTION = (1 << 4)
@@ -9,13 +9,14 @@
 from .collector import Collector
 from .opcode_utils import get_opcode_info, format_opcode
 try:
-    from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
+    from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED, THREAD_STATUS_HAS_EXCEPTION
 except ImportError:
     # Fallback if module not available (shouldn't happen in normal use)
     THREAD_STATUS_HAS_GIL = (1 << 0)
     THREAD_STATUS_ON_CPU = (1 << 1)
     THREAD_STATUS_UNKNOWN = (1 << 2)
     THREAD_STATUS_GIL_REQUESTED = (1 << 3)
+    THREAD_STATUS_HAS_EXCEPTION = (1 << 4)
 
 
 # Categories matching Firefox Profiler expectations
@@ -28,6 +29,7 @@
     {"name": "CPU", "color": "purple", "subcategories": ["Other"]},
     {"name": "Code Type", "color": "red", "subcategories": ["Other"]},
     {"name": "Opcodes", "color": "magenta", "subcategories": ["Other"]},
+    {"name": "Exception", "color": "lightblue", "subcategories": ["Other"]},
 ]
 
 # Category indices
@@ -39,6 +41,7 @@
 CATEGORY_CPU = 5
 CATEGORY_CODE_TYPE = 6
 CATEGORY_OPCODES = 7
+CATEGORY_EXCEPTION = 8
 
 # Subcategory indices
 DEFAULT_SUBCATEGORY = 0
@@ -88,6 +91,8 @@ def __init__(self, sample_interval_usec, *, skip_idle=False, opcodes=False):
         self.python_code_start = {}       # Thread running Python code (has GIL)
         self.native_code_start = {}       # Thread running native code (on CPU without GIL)
         self.gil_wait_start = {}          # Thread waiting for GIL
+        self.exception_start = {}         # Thread has an exception set
+        self.no_exception_start = {}      # Thread has no exception set
 
         # GC event tracking: track GC start time per thread
         self.gc_start_per_thread = {}  # tid -> start_time
@@ -204,6 +209,13 @@ def collect(self, stack_frames):
                     self._add_marker(tid, "Waiting for GIL", self.gil_wait_start.pop(tid),
                                    current_time, CATEGORY_GIL)
 
+                # Track exception state (Has Exception / No Exception)
+                has_exception = bool(status_flags & THREAD_STATUS_HAS_EXCEPTION)
+                self._track_state_transition(
+                    tid, has_exception, self.exception_start, self.no_exception_start,
+                    "Has Exception", "No Exception", CATEGORY_EXCEPTION, current_time
+                )
+
                 # Track GC events by detecting <GC> frames in the stack trace
                 # This leverages the improved GC frame tracking from commit 336366fd7ca
                 # which precisely identifies the thread that initiated GC collection
@@ -622,6 +634,8 @@ def _finalize_markers(self):
             (self.native_code_start, "Native Code", CATEGORY_CODE_TYPE),
             (self.gil_wait_start, "Waiting for GIL", CATEGORY_GIL),
             (self.gc_start_per_thread, "GC Collecting", CATEGORY_GC),
+            (self.exception_start, "Has Exception", CATEGORY_EXCEPTION),
+            (self.no_exception_start, "No Exception", CATEGORY_EXCEPTION),
         ]
 
         for state_dict, marker_name, category in marker_states:

@@ -17,6 +17,7 @@
     THREAD_STATUS_ON_CPU,
     THREAD_STATUS_UNKNOWN,
     THREAD_STATUS_GIL_REQUESTED,
+    THREAD_STATUS_HAS_EXCEPTION,
     PROFILING_MODE_CPU,
     PROFILING_MODE_GIL,
     PROFILING_MODE_WALL,
@@ -61,6 +62,7 @@ class ThreadData:
     on_cpu: int = 0
     gil_requested: int = 0
     unknown: int = 0
+    has_exception: int = 0
     total: int = 0  # Total status samples for this thread
 
     # Sample counts
@@ -82,6 +84,8 @@ def increment_status_flag(self, status_flags):
             self.gil_requested += 1
         if status_flags & THREAD_STATUS_UNKNOWN:
             self.unknown += 1
+        if status_flags & THREAD_STATUS_HAS_EXCEPTION:
+            self.has_exception += 1
         self.total += 1
 
     def as_status_dict(self):
@@ -91,6 +95,7 @@ def as_status_dict(self):
             "on_cpu": self.on_cpu,
             "gil_requested": self.gil_requested,
             "unknown": self.unknown,
+            "has_exception": self.has_exception,
             "total": self.total,
         }
 
@@ -160,6 +165,7 @@ def __init__(
             "on_cpu": 0,
             "gil_requested": 0,
             "unknown": 0,
+            "has_exception": 0,
             "total": 0,  # Total thread count across all samples
         }
         self.gc_frame_samples = 0  # Track samples with GC frames
@@ -359,6 +365,7 @@ def collect(self, stack_frames):
                 thread_data.on_cpu += stats.get("on_cpu", 0)
                 thread_data.gil_requested += stats.get("gil_requested", 0)
                 thread_data.unknown += stats.get("unknown", 0)
+                thread_data.has_exception += stats.get("has_exception", 0)
                 thread_data.total += stats.get("total", 0)
                 if stats.get("gc_samples", 0):
                     thread_data.gc_frame_samples += stats["gc_samples"]
@@ -723,6 +730,7 @@ def reset_stats(self):
             "on_cpu": 0,
             "gil_requested": 0,
             "unknown": 0,
+            "has_exception": 0,
             "total": 0,
         }
         self.gc_frame_samples = 0