diff --git a/Doc/library/profiling.sampling.rst b/Doc/library/profiling.sampling.rst index 23e9173a815d22..2bad2a8d1ab353 100644 --- a/Doc/library/profiling.sampling.rst +++ b/Doc/library/profiling.sampling.rst @@ -470,9 +470,10 @@ which you can use to judge whether the data is sufficient for your analysis. Profiling modes =============== -The sampling profiler supports three modes that control which samples are +The sampling profiler supports four modes that control which samples are recorded. The mode determines what the profile measures: total elapsed time, -CPU execution time, or time spent holding the global interpreter lock. +CPU execution time, time spent holding the global interpreter lock, or +exception handling. Wall-clock mode @@ -553,6 +554,67 @@ single-threaded programs to distinguish Python execution time from time spent in C extensions or I/O. +Exception mode +-------------- + +Exception mode (``--mode=exception``) records samples only when a thread has +an active exception:: + + python -m profiling.sampling run --mode=exception script.py + +Samples are recorded in two situations: when an exception is being propagated +up the call stack (after ``raise`` but before being caught), or when code is +executing inside an ``except`` block where exception information is still +present in the thread state. + +The following example illustrates which code regions are captured: + +.. code-block:: python + + def example(): + try: + raise ValueError("error") # Captured: exception being raised + except ValueError: + process_error() # Captured: inside except block + finally: + cleanup() # NOT captured: exception already handled + + def example_propagating(): + try: + try: + raise ValueError("error") + finally: + cleanup() # Captured: exception propagating through + except ValueError: + pass + + def example_no_exception(): + try: + do_work() + finally: + cleanup() # NOT captured: no exception involved + +Note that ``finally`` blocks are only captured when an exception is actively +propagating through them. Once an ``except`` block finishes executing, Python +clears the exception information before running any subsequent ``finally`` +block. Similarly, ``finally`` blocks that run during normal execution (when no +exception was raised) are not captured because no exception state is present. + +This mode is useful for understanding where your program spends time handling +errors. Exception handling can be a significant source of overhead in code +that uses exceptions for flow control (such as ``StopIteration`` in iterators) +or in applications that process many error conditions (such as network servers +handling connection failures). + +Exception mode helps answer questions like "how much time is spent handling +exceptions?" and "which exception handlers are the most expensive?" It can +reveal hidden performance costs in code that catches and processes many +exceptions, even when those exceptions are handled gracefully. For example, +if a parsing library uses exceptions internally to signal format errors, this +mode will capture time spent in those handlers even if the calling code never +sees the exceptions. + + Output formats ============== @@ -1006,8 +1068,9 @@ Mode options .. option:: --mode - Sampling mode: ``wall`` (default), ``cpu``, or ``gil``. - The ``cpu`` and ``gil`` modes are incompatible with ``--async-aware``. + Sampling mode: ``wall`` (default), ``cpu``, ``gil``, or ``exception``. + The ``cpu``, ``gil``, and ``exception`` modes are incompatible with + ``--async-aware``. .. option:: --async-mode diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 853c47d4402f20..9d4686f982a99a 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -146,6 +146,8 @@ Key features include: and blocking. Use this to identify CPU-bound bottlenecks and optimize computational work. * **GIL-holding time** (``--mode gil``): Measures time spent holding Python's Global Interpreter Lock. Use this to identify which threads dominate GIL usage in multi-threaded applications. + * **Exception handling time** (``--mode exception``): Captures samples only from threads with + an active exception. Use this to analyze exception handling overhead. * **Thread-aware profiling**: Option to profile all threads (``-a``) or just the main thread, essential for understanding multi-threaded application behavior. @@ -175,6 +177,10 @@ Key features include: (``--async-aware``). See which coroutines are consuming time, with options to show only running tasks or all tasks including those waiting. +* **Opcode-level profiling**: Gather bytecode opcode information for instruction-level + profiling (``--opcodes``). Shows which bytecode instructions are executing, including + specializations from the adaptive interpreter. + See :mod:`profiling.sampling` for the complete documentation, including all available output formats, profiling modes, and configuration options. diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index 1cdc4449b173e8..66f14e69f33f44 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -110,8 +110,15 @@ typedef struct _Py_DebugOffsets { uint64_t status; uint64_t holds_gil; uint64_t gil_requested; + uint64_t current_exception; + uint64_t exc_state; } thread_state; + // Exception stack item offset + struct { + uint64_t exc_value; + } err_stackitem; + // InterpreterFrame offset; struct _interpreter_frame { uint64_t size; @@ -282,6 +289,11 @@ typedef struct _Py_DebugOffsets { .status = offsetof(PyThreadState, _status), \ .holds_gil = offsetof(PyThreadState, holds_gil), \ .gil_requested = offsetof(PyThreadState, gil_requested), \ + .current_exception = offsetof(PyThreadState, current_exception), \ + .exc_state = offsetof(PyThreadState, exc_state), \ + }, \ + .err_stackitem = { \ + .exc_value = offsetof(_PyErr_StackItem, exc_value), \ }, \ .interpreter_frame = { \ .size = sizeof(_PyInterpreterFrame), \ diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js index 3076edd1d68cba..0370c18a25049f 100644 --- a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js +++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js @@ -717,6 +717,10 @@ function populateThreadStats(data, selectedThreadId = null) { const gcPctElem = document.getElementById('gc-pct'); if (gcPctElem) gcPctElem.textContent = `${(threadStats.gc_pct || 0).toFixed(1)}%`; + + // Exception stats + const excPctElem = document.getElementById('exc-pct'); + if (excPctElem) excPctElem.textContent = `${(threadStats.has_exception_pct || 0).toFixed(1)}%`; } // ============================================================================ diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html b/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html index 82102c229e7af9..29e5fdd3f35069 100644 --- a/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html +++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html @@ -161,6 +161,10 @@

Runtime Stats

--
GC
+
+
--
+
Exception
+
diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py index 22bfce8c2ead99..3a0444db4c3636 100644 --- a/Lib/profiling/sampling/cli.py +++ b/Lib/profiling/sampling/cli.py @@ -16,6 +16,7 @@ PROFILING_MODE_WALL, PROFILING_MODE_CPU, PROFILING_MODE_GIL, + PROFILING_MODE_EXCEPTION, SORT_MODE_NSAMPLES, SORT_MODE_TOTTIME, SORT_MODE_CUMTIME, @@ -90,6 +91,7 @@ def _parse_mode(mode_string): "wall": PROFILING_MODE_WALL, "cpu": PROFILING_MODE_CPU, "gil": PROFILING_MODE_GIL, + "exception": PROFILING_MODE_EXCEPTION, } return mode_map[mode_string] @@ -213,10 +215,12 @@ def _add_mode_options(parser): mode_group = parser.add_argument_group("Mode options") mode_group.add_argument( "--mode", - choices=["wall", "cpu", "gil"], + choices=["wall", "cpu", "gil", "exception"], default="wall", help="Sampling mode: wall (all samples), cpu (only samples when thread is on CPU), " - "gil (only samples when thread holds the GIL). Incompatible with --async-aware", + "gil (only samples when thread holds the GIL), " + "exception (only samples when thread has an active exception). " + "Incompatible with --async-aware", ) mode_group.add_argument( "--async-mode", diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py index 22055cf84007b6..a1f6ec190f6556 100644 --- a/Lib/profiling/sampling/collector.py +++ b/Lib/profiling/sampling/collector.py @@ -5,6 +5,7 @@ THREAD_STATUS_ON_CPU, THREAD_STATUS_GIL_REQUESTED, THREAD_STATUS_UNKNOWN, + THREAD_STATUS_HAS_EXCEPTION, ) try: @@ -170,7 +171,7 @@ def _collect_thread_status_stats(self, stack_frames): Returns: tuple: (aggregate_status_counts, has_gc_frame, per_thread_stats) - - aggregate_status_counts: dict with has_gil, on_cpu, etc. + - aggregate_status_counts: dict with has_gil, on_cpu, has_exception, etc. - has_gc_frame: bool indicating if any thread has GC frames - per_thread_stats: dict mapping thread_id to per-thread counts """ @@ -179,6 +180,7 @@ def _collect_thread_status_stats(self, stack_frames): "on_cpu": 0, "gil_requested": 0, "unknown": 0, + "has_exception": 0, "total": 0, } has_gc_frame = False @@ -200,6 +202,8 @@ def _collect_thread_status_stats(self, stack_frames): status_counts["gil_requested"] += 1 if status_flags & THREAD_STATUS_UNKNOWN: status_counts["unknown"] += 1 + if status_flags & THREAD_STATUS_HAS_EXCEPTION: + status_counts["has_exception"] += 1 # Track per-thread statistics thread_id = getattr(thread_info, "thread_id", None) @@ -210,6 +214,7 @@ def _collect_thread_status_stats(self, stack_frames): "on_cpu": 0, "gil_requested": 0, "unknown": 0, + "has_exception": 0, "total": 0, "gc_samples": 0, } @@ -225,6 +230,8 @@ def _collect_thread_status_stats(self, stack_frames): thread_stats["gil_requested"] += 1 if status_flags & THREAD_STATUS_UNKNOWN: thread_stats["unknown"] += 1 + if status_flags & THREAD_STATUS_HAS_EXCEPTION: + thread_stats["has_exception"] += 1 # Check for GC frames in this thread frames = getattr(thread_info, "frame_info", None) diff --git a/Lib/profiling/sampling/constants.py b/Lib/profiling/sampling/constants.py index b05f1703c8505f..34b85ba4b3c61d 100644 --- a/Lib/profiling/sampling/constants.py +++ b/Lib/profiling/sampling/constants.py @@ -5,6 +5,7 @@ PROFILING_MODE_CPU = 1 PROFILING_MODE_GIL = 2 PROFILING_MODE_ALL = 3 # Combines GIL + CPU checks +PROFILING_MODE_EXCEPTION = 4 # Only samples when thread has an active exception # Sort mode constants SORT_MODE_NSAMPLES = 0 @@ -25,6 +26,7 @@ THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED, + THREAD_STATUS_HAS_EXCEPTION, ) except ImportError: # Fallback for tests or when module is not available @@ -32,3 +34,4 @@ THREAD_STATUS_ON_CPU = (1 << 1) THREAD_STATUS_UNKNOWN = (1 << 2) THREAD_STATUS_GIL_REQUESTED = (1 << 3) + THREAD_STATUS_HAS_EXCEPTION = (1 << 4) diff --git a/Lib/profiling/sampling/gecko_collector.py b/Lib/profiling/sampling/gecko_collector.py index b25ee079dd6ce9..608a15da483729 100644 --- a/Lib/profiling/sampling/gecko_collector.py +++ b/Lib/profiling/sampling/gecko_collector.py @@ -9,13 +9,14 @@ from .collector import Collector from .opcode_utils import get_opcode_info, format_opcode try: - from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED + from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED, THREAD_STATUS_HAS_EXCEPTION except ImportError: # Fallback if module not available (shouldn't happen in normal use) THREAD_STATUS_HAS_GIL = (1 << 0) THREAD_STATUS_ON_CPU = (1 << 1) THREAD_STATUS_UNKNOWN = (1 << 2) THREAD_STATUS_GIL_REQUESTED = (1 << 3) + THREAD_STATUS_HAS_EXCEPTION = (1 << 4) # Categories matching Firefox Profiler expectations @@ -28,6 +29,7 @@ {"name": "CPU", "color": "purple", "subcategories": ["Other"]}, {"name": "Code Type", "color": "red", "subcategories": ["Other"]}, {"name": "Opcodes", "color": "magenta", "subcategories": ["Other"]}, + {"name": "Exception", "color": "lightblue", "subcategories": ["Other"]}, ] # Category indices @@ -39,6 +41,7 @@ CATEGORY_CPU = 5 CATEGORY_CODE_TYPE = 6 CATEGORY_OPCODES = 7 +CATEGORY_EXCEPTION = 8 # Subcategory indices DEFAULT_SUBCATEGORY = 0 @@ -88,6 +91,8 @@ def __init__(self, sample_interval_usec, *, skip_idle=False, opcodes=False): self.python_code_start = {} # Thread running Python code (has GIL) self.native_code_start = {} # Thread running native code (on CPU without GIL) self.gil_wait_start = {} # Thread waiting for GIL + self.exception_start = {} # Thread has an exception set + self.no_exception_start = {} # Thread has no exception set # GC event tracking: track GC start time per thread self.gc_start_per_thread = {} # tid -> start_time @@ -204,6 +209,13 @@ def collect(self, stack_frames): self._add_marker(tid, "Waiting for GIL", self.gil_wait_start.pop(tid), current_time, CATEGORY_GIL) + # Track exception state (Has Exception / No Exception) + has_exception = bool(status_flags & THREAD_STATUS_HAS_EXCEPTION) + self._track_state_transition( + tid, has_exception, self.exception_start, self.no_exception_start, + "Has Exception", "No Exception", CATEGORY_EXCEPTION, current_time + ) + # Track GC events by detecting frames in the stack trace # This leverages the improved GC frame tracking from commit 336366fd7ca # which precisely identifies the thread that initiated GC collection @@ -622,6 +634,8 @@ def _finalize_markers(self): (self.native_code_start, "Native Code", CATEGORY_CODE_TYPE), (self.gil_wait_start, "Waiting for GIL", CATEGORY_GIL), (self.gc_start_per_thread, "GC Collecting", CATEGORY_GC), + (self.exception_start, "Has Exception", CATEGORY_EXCEPTION), + (self.no_exception_start, "No Exception", CATEGORY_EXCEPTION), ] for state_dict, marker_name, category in marker_states: diff --git a/Lib/profiling/sampling/live_collector/collector.py b/Lib/profiling/sampling/live_collector/collector.py index 3d25b5969835c0..1652089ad3f52d 100644 --- a/Lib/profiling/sampling/live_collector/collector.py +++ b/Lib/profiling/sampling/live_collector/collector.py @@ -17,6 +17,7 @@ THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED, + THREAD_STATUS_HAS_EXCEPTION, PROFILING_MODE_CPU, PROFILING_MODE_GIL, PROFILING_MODE_WALL, @@ -61,6 +62,7 @@ class ThreadData: on_cpu: int = 0 gil_requested: int = 0 unknown: int = 0 + has_exception: int = 0 total: int = 0 # Total status samples for this thread # Sample counts @@ -82,6 +84,8 @@ def increment_status_flag(self, status_flags): self.gil_requested += 1 if status_flags & THREAD_STATUS_UNKNOWN: self.unknown += 1 + if status_flags & THREAD_STATUS_HAS_EXCEPTION: + self.has_exception += 1 self.total += 1 def as_status_dict(self): @@ -91,6 +95,7 @@ def as_status_dict(self): "on_cpu": self.on_cpu, "gil_requested": self.gil_requested, "unknown": self.unknown, + "has_exception": self.has_exception, "total": self.total, } @@ -160,6 +165,7 @@ def __init__( "on_cpu": 0, "gil_requested": 0, "unknown": 0, + "has_exception": 0, "total": 0, # Total thread count across all samples } self.gc_frame_samples = 0 # Track samples with GC frames @@ -359,6 +365,7 @@ def collect(self, stack_frames): thread_data.on_cpu += stats.get("on_cpu", 0) thread_data.gil_requested += stats.get("gil_requested", 0) thread_data.unknown += stats.get("unknown", 0) + thread_data.has_exception += stats.get("has_exception", 0) thread_data.total += stats.get("total", 0) if stats.get("gc_samples", 0): thread_data.gc_frame_samples += stats["gc_samples"] @@ -723,6 +730,7 @@ def reset_stats(self): "on_cpu": 0, "gil_requested": 0, "unknown": 0, + "has_exception": 0, "total": 0, } self.gc_frame_samples = 0 diff --git a/Lib/profiling/sampling/live_collector/widgets.py b/Lib/profiling/sampling/live_collector/widgets.py index 869405671ffeed..8f72f69b057628 100644 --- a/Lib/profiling/sampling/live_collector/widgets.py +++ b/Lib/profiling/sampling/live_collector/widgets.py @@ -389,6 +389,7 @@ def draw_thread_status(self, line, width): pct_on_gil = (status_counts["has_gil"] / total_threads) * 100 pct_off_gil = 100.0 - pct_on_gil pct_gil_requested = (status_counts["gil_requested"] / total_threads) * 100 + pct_exception = (status_counts.get("has_exception", 0) / total_threads) * 100 # Get GC percentage based on view mode if thread_data: @@ -427,6 +428,17 @@ def draw_thread_status(self, line, width): add_separator=True, ) + # Show exception stats + if col < width - 15: + col = self._add_percentage_stat( + line, + col, + pct_exception, + "exc", + self.colors["red"], + add_separator=(col > 11), + ) + # Always show GC stats if col < width - 15: col = self._add_percentage_stat( diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py index d5b8e21134ca18..294ec3003fc6bc 100644 --- a/Lib/profiling/sampling/sample.py +++ b/Lib/profiling/sampling/sample.py @@ -17,6 +17,7 @@ PROFILING_MODE_CPU, PROFILING_MODE_GIL, PROFILING_MODE_ALL, + PROFILING_MODE_EXCEPTION, ) try: from .live_collector import LiveStatsCollector @@ -300,7 +301,8 @@ def sample( all_threads: Whether to sample all threads realtime_stats: Whether to print real-time sampling statistics mode: Profiling mode - WALL (all samples), CPU (only when on CPU), - GIL (only when holding GIL), ALL (includes GIL and CPU status) + GIL (only when holding GIL), ALL (includes GIL and CPU status), + EXCEPTION (only when thread has an active exception) native: Whether to include native frames gc: Whether to include GC frames opcodes: Whether to include opcode information @@ -360,7 +362,8 @@ def sample_live( all_threads: Whether to sample all threads realtime_stats: Whether to print real-time sampling statistics mode: Profiling mode - WALL (all samples), CPU (only when on CPU), - GIL (only when holding GIL), ALL (includes GIL and CPU status) + GIL (only when holding GIL), ALL (includes GIL and CPU status), + EXCEPTION (only when thread has an active exception) native: Whether to include native frames gc: Whether to include GC frames opcodes: Whether to include opcode information diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py index e5b86719f00b01..b7aa7f5ff82da3 100644 --- a/Lib/profiling/sampling/stack_collector.py +++ b/Lib/profiling/sampling/stack_collector.py @@ -87,12 +87,13 @@ def __init__(self, *args, **kwargs): "on_cpu": 0, "gil_requested": 0, "unknown": 0, + "has_exception": 0, "total": 0, } self.samples_with_gc_frames = 0 # Per-thread statistics - self.per_thread_stats = {} # {thread_id: {has_gil, on_cpu, gil_requested, unknown, total, gc_samples}} + self.per_thread_stats = {} # {thread_id: {has_gil, on_cpu, gil_requested, unknown, has_exception, total, gc_samples}} def collect(self, stack_frames, skip_idle=False): """Override to track thread status statistics before processing frames.""" @@ -118,6 +119,7 @@ def collect(self, stack_frames, skip_idle=False): "on_cpu": 0, "gil_requested": 0, "unknown": 0, + "has_exception": 0, "total": 0, "gc_samples": 0, } @@ -247,12 +249,16 @@ def convert_children(children, min_samples): } # Calculate thread status percentages for display + import sysconfig + is_free_threaded = bool(sysconfig.get_config_var("Py_GIL_DISABLED")) total_threads = max(1, self.thread_status_counts["total"]) thread_stats = { "has_gil_pct": (self.thread_status_counts["has_gil"] / total_threads) * 100, "on_cpu_pct": (self.thread_status_counts["on_cpu"] / total_threads) * 100, "gil_requested_pct": (self.thread_status_counts["gil_requested"] / total_threads) * 100, + "has_exception_pct": (self.thread_status_counts["has_exception"] / total_threads) * 100, "gc_pct": (self.samples_with_gc_frames / max(1, self._sample_count)) * 100, + "free_threaded": is_free_threaded, **self.thread_status_counts } @@ -265,6 +271,7 @@ def convert_children(children, min_samples): "has_gil_pct": (stats["has_gil"] / total) * 100, "on_cpu_pct": (stats["on_cpu"] / total) * 100, "gil_requested_pct": (stats["gil_requested"] / total) * 100, + "has_exception_pct": (stats["has_exception"] / total) * 100, "gc_pct": (stats["gc_samples"] / total_samples_denominator) * 100, **stats } diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 365beec49497a8..4f3beb15f53b33 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -26,11 +26,13 @@ PROFILING_MODE_CPU = 1 PROFILING_MODE_GIL = 2 PROFILING_MODE_ALL = 3 +PROFILING_MODE_EXCEPTION = 4 # Thread status flags THREAD_STATUS_HAS_GIL = 1 << 0 THREAD_STATUS_ON_CPU = 1 << 1 THREAD_STATUS_UNKNOWN = 1 << 2 +THREAD_STATUS_HAS_EXCEPTION = 1 << 4 # Maximum number of retry attempts for operations that may fail transiently MAX_TRIES = 10 @@ -2260,6 +2262,412 @@ def busy_thread(): finally: _cleanup_sockets(*client_sockets, server_socket) + def _make_exception_test_script(self, port): + """Create script with exception and normal threads for testing.""" + return textwrap.dedent( + f"""\ + import socket + import threading + import time + + def exception_thread(): + conn = socket.create_connection(("localhost", {port})) + conn.sendall(b"exception:" + str(threading.get_native_id()).encode()) + try: + raise ValueError("test exception") + except ValueError: + while True: + time.sleep(0.01) + + def normal_thread(): + conn = socket.create_connection(("localhost", {port})) + conn.sendall(b"normal:" + str(threading.get_native_id()).encode()) + while True: + sum(range(1000)) + + t1 = threading.Thread(target=exception_thread) + t2 = threading.Thread(target=normal_thread) + t1.start() + t2.start() + t1.join() + t2.join() + """ + ) + + @contextmanager + def _run_exception_test_process(self): + """Context manager to run exception test script and yield thread IDs and process.""" + port = find_unused_port() + script = self._make_exception_test_script(port) + + with os_helper.temp_dir() as tmp_dir: + script_file = make_script(tmp_dir, "script", script) + server_socket = _create_server_socket(port, backlog=2) + client_sockets = [] + + try: + with _managed_subprocess([sys.executable, script_file]) as p: + exception_tid = None + normal_tid = None + + for _ in range(2): + client_socket, _ = server_socket.accept() + client_sockets.append(client_socket) + line = client_socket.recv(1024) + if line: + if line.startswith(b"exception:"): + try: + exception_tid = int(line.split(b":")[-1]) + except (ValueError, IndexError): + pass + elif line.startswith(b"normal:"): + try: + normal_tid = int(line.split(b":")[-1]) + except (ValueError, IndexError): + pass + + server_socket.close() + server_socket = None + + yield p, exception_tid, normal_tid + finally: + _cleanup_sockets(*client_sockets, server_socket) + + @unittest.skipIf( + sys.platform not in ("linux", "darwin", "win32"), + "Test only runs on supported platforms (Linux, macOS, or Windows)", + ) + @unittest.skipIf( + sys.platform == "android", "Android raises Linux-specific exception" + ) + def test_thread_status_exception_detection(self): + """Test that THREAD_STATUS_HAS_EXCEPTION is set when thread has an active exception.""" + with self._run_exception_test_process() as (p, exception_tid, normal_tid): + self.assertIsNotNone(exception_tid, "Exception thread id not received") + self.assertIsNotNone(normal_tid, "Normal thread id not received") + + statuses = {} + try: + unwinder = RemoteUnwinder( + p.pid, + all_threads=True, + mode=PROFILING_MODE_ALL, + skip_non_matching_threads=False, + ) + for _ in range(MAX_TRIES): + traces = unwinder.get_stack_trace() + statuses = self._get_thread_statuses(traces) + + if ( + exception_tid in statuses + and normal_tid in statuses + and (statuses[exception_tid] & THREAD_STATUS_HAS_EXCEPTION) + and not (statuses[normal_tid] & THREAD_STATUS_HAS_EXCEPTION) + ): + break + time.sleep(0.5) + except PermissionError: + self.skipTest("Insufficient permissions to read the stack trace") + + self.assertIn(exception_tid, statuses) + self.assertIn(normal_tid, statuses) + self.assertTrue( + statuses[exception_tid] & THREAD_STATUS_HAS_EXCEPTION, + "Exception thread should have HAS_EXCEPTION flag", + ) + self.assertFalse( + statuses[normal_tid] & THREAD_STATUS_HAS_EXCEPTION, + "Normal thread should not have HAS_EXCEPTION flag", + ) + + @unittest.skipIf( + sys.platform not in ("linux", "darwin", "win32"), + "Test only runs on supported platforms (Linux, macOS, or Windows)", + ) + @unittest.skipIf( + sys.platform == "android", "Android raises Linux-specific exception" + ) + def test_thread_status_exception_mode_filtering(self): + """Test that PROFILING_MODE_EXCEPTION correctly filters threads.""" + with self._run_exception_test_process() as (p, exception_tid, normal_tid): + self.assertIsNotNone(exception_tid, "Exception thread id not received") + self.assertIsNotNone(normal_tid, "Normal thread id not received") + + try: + unwinder = RemoteUnwinder( + p.pid, + all_threads=True, + mode=PROFILING_MODE_EXCEPTION, + skip_non_matching_threads=True, + ) + for _ in range(MAX_TRIES): + traces = unwinder.get_stack_trace() + statuses = self._get_thread_statuses(traces) + + if exception_tid in statuses: + self.assertNotIn( + normal_tid, + statuses, + "Normal thread should be filtered out in exception mode", + ) + return + time.sleep(0.5) + except PermissionError: + self.skipTest("Insufficient permissions to read the stack trace") + + self.fail("Never found exception thread in exception mode") + +class TestExceptionDetectionScenarios(RemoteInspectionTestBase): + """Test exception detection across all scenarios. + + This class verifies the exact conditions under which THREAD_STATUS_HAS_EXCEPTION + is set. Each test covers a specific scenario: + + 1. except_block: Thread inside except block + -> SHOULD have HAS_EXCEPTION (exc_info->exc_value is set) + + 2. finally_propagating: Exception propagating through finally block + -> SHOULD have HAS_EXCEPTION (current_exception is set) + + 3. finally_after_except: Finally block after except handled exception + -> Should NOT have HAS_EXCEPTION (exc_info cleared after except) + + 4. finally_no_exception: Finally block with no exception raised + -> Should NOT have HAS_EXCEPTION (no exception state) + """ + + def _make_single_scenario_script(self, port, scenario): + """Create script for a single exception scenario.""" + scenarios = { + "except_block": f"""\ +import socket +import threading +import time + +def target_thread(): + '''Inside except block - exception info is present''' + conn = socket.create_connection(("localhost", {port})) + conn.sendall(b"ready:" + str(threading.get_native_id()).encode()) + try: + raise ValueError("test") + except ValueError: + while True: + time.sleep(0.01) + +t = threading.Thread(target=target_thread) +t.start() +t.join() +""", + "finally_propagating": f"""\ +import socket +import threading +import time + +def target_thread(): + '''Exception propagating through finally - current_exception is set''' + conn = socket.create_connection(("localhost", {port})) + conn.sendall(b"ready:" + str(threading.get_native_id()).encode()) + try: + try: + raise ValueError("propagating") + finally: + # Exception is propagating through here + while True: + time.sleep(0.01) + except: + pass # Never reached due to infinite loop + +t = threading.Thread(target=target_thread) +t.start() +t.join() +""", + "finally_after_except": f"""\ +import socket +import threading +import time + +def target_thread(): + '''Finally runs after except handled - exc_info is cleared''' + conn = socket.create_connection(("localhost", {port})) + conn.sendall(b"ready:" + str(threading.get_native_id()).encode()) + try: + raise ValueError("test") + except ValueError: + pass # Exception caught and handled + finally: + while True: + time.sleep(0.01) + +t = threading.Thread(target=target_thread) +t.start() +t.join() +""", + "finally_no_exception": f"""\ +import socket +import threading +import time + +def target_thread(): + '''Finally with no exception at all''' + conn = socket.create_connection(("localhost", {port})) + conn.sendall(b"ready:" + str(threading.get_native_id()).encode()) + try: + pass # No exception + finally: + while True: + time.sleep(0.01) + +t = threading.Thread(target=target_thread) +t.start() +t.join() +""", + } + + return scenarios[scenario] + + @contextmanager + def _run_scenario_process(self, scenario): + """Context manager to run a single scenario and yield thread ID and process.""" + port = find_unused_port() + script = self._make_single_scenario_script(port, scenario) + + with os_helper.temp_dir() as tmp_dir: + script_file = make_script(tmp_dir, "script", script) + server_socket = _create_server_socket(port, backlog=1) + client_socket = None + + try: + with _managed_subprocess([sys.executable, script_file]) as p: + thread_tid = None + + client_socket, _ = server_socket.accept() + line = client_socket.recv(1024) + if line and line.startswith(b"ready:"): + try: + thread_tid = int(line.split(b":")[-1]) + except (ValueError, IndexError): + pass + + server_socket.close() + server_socket = None + + yield p, thread_tid + finally: + _cleanup_sockets(client_socket, server_socket) + + def _check_exception_status(self, p, thread_tid, expect_exception): + """Helper to check if thread has expected exception status.""" + try: + unwinder = RemoteUnwinder( + p.pid, + all_threads=True, + mode=PROFILING_MODE_ALL, + skip_non_matching_threads=False, + ) + + # Collect multiple samples for reliability + results = [] + for _ in range(MAX_TRIES): + traces = unwinder.get_stack_trace() + statuses = self._get_thread_statuses(traces) + + if thread_tid in statuses: + has_exc = bool(statuses[thread_tid] & THREAD_STATUS_HAS_EXCEPTION) + results.append(has_exc) + + if len(results) >= 3: + break + + time.sleep(0.2) + + # Check majority of samples match expected + if not results: + self.fail("Never found target thread in stack traces") + + majority = sum(results) > len(results) // 2 + if expect_exception: + self.assertTrue( + majority, + f"Thread should have HAS_EXCEPTION flag, got {results}" + ) + else: + self.assertFalse( + majority, + f"Thread should NOT have HAS_EXCEPTION flag, got {results}" + ) + + except PermissionError: + self.skipTest("Insufficient permissions to read the stack trace") + + @unittest.skipIf( + sys.platform not in ("linux", "darwin", "win32"), + "Test only runs on supported platforms (Linux, macOS, or Windows)", + ) + @unittest.skipIf( + sys.platform == "android", "Android raises Linux-specific exception" + ) + def test_except_block_has_exception(self): + """Test that thread inside except block has HAS_EXCEPTION flag. + + When a thread is executing inside an except block, exc_info->exc_value + is set, so THREAD_STATUS_HAS_EXCEPTION should be True. + """ + with self._run_scenario_process("except_block") as (p, thread_tid): + self.assertIsNotNone(thread_tid, "Thread ID not received") + self._check_exception_status(p, thread_tid, expect_exception=True) + + @unittest.skipIf( + sys.platform not in ("linux", "darwin", "win32"), + "Test only runs on supported platforms (Linux, macOS, or Windows)", + ) + @unittest.skipIf( + sys.platform == "android", "Android raises Linux-specific exception" + ) + def test_finally_propagating_has_exception(self): + """Test that finally block with propagating exception has HAS_EXCEPTION flag. + + When an exception is propagating through a finally block (not yet caught), + current_exception is set, so THREAD_STATUS_HAS_EXCEPTION should be True. + """ + with self._run_scenario_process("finally_propagating") as (p, thread_tid): + self.assertIsNotNone(thread_tid, "Thread ID not received") + self._check_exception_status(p, thread_tid, expect_exception=True) + + @unittest.skipIf( + sys.platform not in ("linux", "darwin", "win32"), + "Test only runs on supported platforms (Linux, macOS, or Windows)", + ) + @unittest.skipIf( + sys.platform == "android", "Android raises Linux-specific exception" + ) + def test_finally_after_except_no_exception(self): + """Test that finally block after except has NO HAS_EXCEPTION flag. + + When a finally block runs after an except block has handled the exception, + Python clears exc_info before entering finally, so THREAD_STATUS_HAS_EXCEPTION + should be False. + """ + with self._run_scenario_process("finally_after_except") as (p, thread_tid): + self.assertIsNotNone(thread_tid, "Thread ID not received") + self._check_exception_status(p, thread_tid, expect_exception=False) + + @unittest.skipIf( + sys.platform not in ("linux", "darwin", "win32"), + "Test only runs on supported platforms (Linux, macOS, or Windows)", + ) + @unittest.skipIf( + sys.platform == "android", "Android raises Linux-specific exception" + ) + def test_finally_no_exception_no_flag(self): + """Test that finally block with no exception has NO HAS_EXCEPTION flag. + + When a finally block runs during normal execution (no exception raised), + there is no exception state, so THREAD_STATUS_HAS_EXCEPTION should be False. + """ + with self._run_scenario_process("finally_no_exception") as (p, thread_tid): + self.assertIsNotNone(thread_tid, "Thread ID not received") + self._check_exception_status(p, thread_tid, expect_exception=False) + class TestFrameCaching(RemoteInspectionTestBase): """Test that frame caching produces correct results. diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py index c0457ee7eb8357..c086fbb572b256 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py @@ -427,7 +427,198 @@ def test_parse_mode_function(self): self.assertEqual(_parse_mode("wall"), 0) self.assertEqual(_parse_mode("cpu"), 1) self.assertEqual(_parse_mode("gil"), 2) + self.assertEqual(_parse_mode("exception"), 4) # Test invalid mode raises KeyError with self.assertRaises(KeyError): _parse_mode("invalid") + + +class TestExceptionModeFiltering(unittest.TestCase): + """Test exception mode filtering functionality (--mode=exception).""" + + def test_exception_mode_validation(self): + """Test that CLI accepts exception mode choice correctly.""" + from profiling.sampling.cli import main + + test_args = [ + "profiling.sampling.cli", + "attach", + "12345", + "--mode", + "exception", + ] + + with ( + mock.patch("sys.argv", test_args), + mock.patch("profiling.sampling.cli.sample") as mock_sample, + ): + try: + main() + except (SystemExit, OSError, RuntimeError): + pass # Expected due to invalid PID + + # Should have attempted to call sample with mode=4 (exception mode) + mock_sample.assert_called_once() + call_args = mock_sample.call_args + # Check the mode parameter (should be in kwargs) + self.assertEqual(call_args.kwargs.get("mode"), 4) # PROFILING_MODE_EXCEPTION + + def test_exception_mode_sample_function_call(self): + """Test that sample() function correctly uses exception mode.""" + with ( + mock.patch( + "profiling.sampling.sample.SampleProfiler" + ) as mock_profiler, + ): + # Mock the profiler instance + mock_instance = mock.Mock() + mock_profiler.return_value = mock_instance + + # Create a real collector instance + collector = PstatsCollector(sample_interval_usec=1000, skip_idle=True) + + # Call sample with exception mode + profiling.sampling.sample.sample( + 12345, + collector, + mode=4, # PROFILING_MODE_EXCEPTION + duration_sec=1, + ) + + # Verify SampleProfiler was created with correct mode + mock_profiler.assert_called_once() + call_args = mock_profiler.call_args + self.assertEqual(call_args[1]["mode"], 4) # mode parameter + + # Verify profiler.sample was called + mock_instance.sample.assert_called_once() + + def test_exception_mode_cli_argument_parsing(self): + """Test CLI argument parsing for exception mode with various options.""" + from profiling.sampling.cli import main + + test_args = [ + "profiling.sampling.cli", + "attach", + "12345", + "--mode", + "exception", + "-i", + "500", + "-d", + "5", + ] + + with ( + mock.patch("sys.argv", test_args), + mock.patch("profiling.sampling.cli.sample") as mock_sample, + ): + try: + main() + except (SystemExit, OSError, RuntimeError): + pass # Expected due to invalid PID + + # Verify all arguments were parsed correctly + mock_sample.assert_called_once() + call_args = mock_sample.call_args + self.assertEqual(call_args.kwargs.get("mode"), 4) # exception mode + self.assertEqual(call_args.kwargs.get("duration_sec"), 5) + + def test_exception_mode_constants_are_defined(self): + """Test that exception mode constant is properly defined.""" + from profiling.sampling.constants import PROFILING_MODE_EXCEPTION + self.assertEqual(PROFILING_MODE_EXCEPTION, 4) + + @requires_subprocess() + def test_exception_mode_integration_filtering(self): + """Integration test: Exception mode should only capture threads with active exceptions.""" + # Script with one thread handling an exception and one normal thread + exception_vs_normal_script = """ +import time +import threading + +exception_ready = threading.Event() + +def normal_worker(): + x = 0 + while True: + x += 1 + +def exception_handling_worker(): + try: + raise ValueError("test exception") + except ValueError: + # Signal AFTER entering except block, then do CPU work + exception_ready.set() + x = 0 + while True: + x += 1 + +normal_thread = threading.Thread(target=normal_worker) +exception_thread = threading.Thread(target=exception_handling_worker) +normal_thread.start() +exception_thread.start() +exception_ready.wait() +_test_sock.sendall(b"working") +normal_thread.join() +exception_thread.join() +""" + with test_subprocess(exception_vs_normal_script, wait_for_working=True) as subproc: + + with ( + io.StringIO() as captured_output, + mock.patch("sys.stdout", captured_output), + ): + try: + collector = PstatsCollector(sample_interval_usec=5000, skip_idle=True) + profiling.sampling.sample.sample( + subproc.process.pid, + collector, + duration_sec=2.0, + mode=4, # Exception mode + all_threads=True, + ) + collector.print_stats(show_summary=False, mode=4) + except (PermissionError, RuntimeError) as e: + self.skipTest( + "Insufficient permissions for remote profiling" + ) + + exception_mode_output = captured_output.getvalue() + + # Test wall-clock mode (mode=0) - should capture both functions + with ( + io.StringIO() as captured_output, + mock.patch("sys.stdout", captured_output), + ): + try: + collector = PstatsCollector(sample_interval_usec=5000, skip_idle=False) + profiling.sampling.sample.sample( + subproc.process.pid, + collector, + duration_sec=2.0, + mode=0, # Wall-clock mode + all_threads=True, + ) + collector.print_stats(show_summary=False) + except (PermissionError, RuntimeError) as e: + self.skipTest( + "Insufficient permissions for remote profiling" + ) + + wall_mode_output = captured_output.getvalue() + + # Verify both modes captured samples + self.assertIn("Captured", exception_mode_output) + self.assertIn("samples", exception_mode_output) + self.assertIn("Captured", wall_mode_output) + self.assertIn("samples", wall_mode_output) + + # Exception mode should strongly favor exception_handling_worker over normal_worker + self.assertIn("exception_handling_worker", exception_mode_output) + self.assertNotIn("normal_worker", exception_mode_output) + + # Wall-clock mode should capture both types of work + self.assertIn("exception_handling_worker", wall_mode_output) + self.assertIn("normal_worker", wall_mode_output) diff --git a/Misc/NEWS.d/next/Library/2025-12-11-04-18-49.gh-issue-138122.m3EF9E.rst b/Misc/NEWS.d/next/Library/2025-12-11-04-18-49.gh-issue-138122.m3EF9E.rst new file mode 100644 index 00000000000000..9c471ee438df15 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-11-04-18-49.gh-issue-138122.m3EF9E.rst @@ -0,0 +1,3 @@ +Add ``--mode=exception`` to the sampling profiler to capture samples only from +threads with an active exception, useful for analyzing exception handling +overhead. Patch by Pablo Galindo. diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h index 0aa98349296b8a..fcb75b841b742e 100644 --- a/Modules/_remote_debugging/_remote_debugging.h +++ b/Modules/_remote_debugging/_remote_debugging.h @@ -109,10 +109,11 @@ typedef enum _WIN32_THREADSTATE { #define MAX_TLBC_SIZE 2048 /* Thread status flags */ -#define THREAD_STATUS_HAS_GIL (1 << 0) -#define THREAD_STATUS_ON_CPU (1 << 1) -#define THREAD_STATUS_UNKNOWN (1 << 2) -#define THREAD_STATUS_GIL_REQUESTED (1 << 3) +#define THREAD_STATUS_HAS_GIL (1 << 0) +#define THREAD_STATUS_ON_CPU (1 << 1) +#define THREAD_STATUS_UNKNOWN (1 << 2) +#define THREAD_STATUS_GIL_REQUESTED (1 << 3) +#define THREAD_STATUS_HAS_EXCEPTION (1 << 4) /* Exception cause macro */ #define set_exception_cause(unwinder, exc_type, message) \ @@ -209,7 +210,8 @@ enum _ProfilingMode { PROFILING_MODE_WALL = 0, PROFILING_MODE_CPU = 1, PROFILING_MODE_GIL = 2, - PROFILING_MODE_ALL = 3 + PROFILING_MODE_ALL = 3, + PROFILING_MODE_EXCEPTION = 4 }; typedef struct { diff --git a/Modules/_remote_debugging/module.c b/Modules/_remote_debugging/module.c index 9b05b911658190..a194d88c3c3ca0 100644 --- a/Modules/_remote_debugging/module.c +++ b/Modules/_remote_debugging/module.c @@ -568,7 +568,8 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self gc_frame); if (!frame_info) { // Check if this was an intentional skip due to mode-based filtering - if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL) && !PyErr_Occurred()) { + if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL || + self->mode == PROFILING_MODE_EXCEPTION) && !PyErr_Occurred()) { // Thread was skipped due to mode filtering, continue to next thread continue; } @@ -1068,6 +1069,9 @@ _remote_debugging_exec(PyObject *m) if (PyModule_AddIntConstant(m, "THREAD_STATUS_GIL_REQUESTED", THREAD_STATUS_GIL_REQUESTED) < 0) { return -1; } + if (PyModule_AddIntConstant(m, "THREAD_STATUS_HAS_EXCEPTION", THREAD_STATUS_HAS_EXCEPTION) < 0) { + return -1; + } if (RemoteDebugging_InitState(st) < 0) { return -1; diff --git a/Modules/_remote_debugging/threads.c b/Modules/_remote_debugging/threads.c index f564e3a7256fa7..81c13ea48e3c49 100644 --- a/Modules/_remote_debugging/threads.c +++ b/Modules/_remote_debugging/threads.c @@ -344,6 +344,33 @@ unwind_stack_for_thread( gil_requested = 0; } + // Check exception state (both raised and handled exceptions) + int has_exception = 0; + + // Check current_exception (exception being raised/propagated) + uintptr_t current_exception = GET_MEMBER(uintptr_t, ts, + unwinder->debug_offsets.thread_state.current_exception); + if (current_exception != 0) { + has_exception = 1; + } + + // Check exc_state.exc_value (exception being handled in except block) + // exc_state is embedded in PyThreadState, so we read it directly from + // the thread state buffer. This catches most cases; nested exception + // handlers where exc_info points elsewhere are rare. + if (!has_exception) { + uintptr_t exc_value = GET_MEMBER(uintptr_t, ts, + unwinder->debug_offsets.thread_state.exc_state + + unwinder->debug_offsets.err_stackitem.exc_value); + if (exc_value != 0) { + has_exception = 1; + } + } + + if (has_exception) { + status_flags |= THREAD_STATUS_HAS_EXCEPTION; + } + // Check CPU status long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id); @@ -368,6 +395,9 @@ unwind_stack_for_thread( } else if (unwinder->mode == PROFILING_MODE_GIL) { // Skip if doesn't have GIL should_skip = !(status_flags & THREAD_STATUS_HAS_GIL); + } else if (unwinder->mode == PROFILING_MODE_EXCEPTION) { + // Skip if thread doesn't have an exception active + should_skip = !(status_flags & THREAD_STATUS_HAS_EXCEPTION); } // PROFILING_MODE_WALL and PROFILING_MODE_ALL never skip }