From 428b62f1a7cecc78689cbbdc92dd0a78a197b39f Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 17 Dec 2025 14:59:56 +0000 Subject: [PATCH] gh-91048: Refactor common data into context object in Modukes/_remote_debugging --- Modules/_remote_debugging/_remote_debugging.h | 53 +++-- Modules/_remote_debugging/code_objects.c | 29 +-- Modules/_remote_debugging/frame_cache.c | 8 + Modules/_remote_debugging/frames.c | 220 ++++++++---------- Modules/_remote_debugging/threads.c | 26 ++- 5 files changed, 172 insertions(+), 164 deletions(-) diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h index 663479f235af09..cc8aeedba9c6c0 100644 --- a/Modules/_remote_debugging/_remote_debugging.h +++ b/Modules/_remote_debugging/_remote_debugging.h @@ -279,6 +279,35 @@ typedef struct { size_t count; } StackChunkList; +/* + * Context for frame chain traversal operations. + */ +typedef struct { + /* Inputs */ + uintptr_t frame_addr; // Starting frame address + uintptr_t base_frame_addr; // Sentinel at bottom (for validation) + uintptr_t gc_frame; // GC frame address (0 if not tracking) + uintptr_t last_profiled_frame; // Last cached frame (0 if no cache) + StackChunkList *chunks; // Pre-copied stack chunks + + /* Outputs */ + PyObject *frame_info; // List to append FrameInfo objects + uintptr_t *frame_addrs; // Array of visited frame addresses + Py_ssize_t num_addrs; // Count of addresses collected + Py_ssize_t max_addrs; // Capacity of frame_addrs array + uintptr_t last_frame_visited; // Last frame address visited + int stopped_at_cached_frame; // Whether we stopped at cached frame +} FrameWalkContext; + +/* + * Context for code object parsing. + */ +typedef struct { + uintptr_t code_addr; // Code object address in remote process + uintptr_t instruction_pointer; // Current instruction pointer + int32_t tlbc_index; // Thread-local bytecode index (free-threading) +} CodeObjectContext; + /* Function pointer types for iteration callbacks */ typedef int (*thread_processor_func)( RemoteUnwinderObject *unwinder, @@ -343,10 +372,7 @@ extern long read_py_long(RemoteUnwinderObject *unwinder, uintptr_t address); extern int parse_code_object( RemoteUnwinderObject *unwinder, PyObject **result, - uintptr_t address, - uintptr_t instruction_pointer, - uintptr_t *previous_frame, - int32_t tlbc_index + const CodeObjectContext *ctx ); extern PyObject *make_location_info( @@ -420,17 +446,7 @@ extern void *find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr); extern int process_frame_chain( RemoteUnwinderObject *unwinder, - uintptr_t initial_frame_addr, - StackChunkList *chunks, - PyObject *frame_info, - uintptr_t base_frame_addr, - uintptr_t gc_frame, - uintptr_t last_profiled_frame, - int *stopped_at_cached_frame, - uintptr_t *frame_addrs, - Py_ssize_t *num_addrs, - Py_ssize_t max_addrs, - uintptr_t *out_last_frame_addr + FrameWalkContext *ctx ); /* Frame cache functions */ @@ -460,12 +476,7 @@ extern int frame_cache_store( extern int collect_frames_with_cache( RemoteUnwinderObject *unwinder, - uintptr_t frame_addr, - StackChunkList *chunks, - PyObject *frame_info, - uintptr_t base_frame_addr, - uintptr_t gc_frame, - uintptr_t last_profiled_frame, + FrameWalkContext *ctx, uint64_t thread_id); /* ============================================================================ diff --git a/Modules/_remote_debugging/code_objects.c b/Modules/_remote_debugging/code_objects.c index 98fe74e8cb6331..ca6ffe7a00af60 100644 --- a/Modules/_remote_debugging/code_objects.c +++ b/Modules/_remote_debugging/code_objects.c @@ -76,6 +76,7 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t PyErr_SetString(PyExc_RuntimeError, "TLBC array size exceeds maximum limit"); return 0; // Invalid size } + assert(tlbc_size > 0 && tlbc_size <= MAX_TLBC_SIZE); // Allocate and read the entire TLBC array size_t array_data_size = tlbc_size * sizeof(void*); @@ -156,8 +157,11 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L const uint8_t* ptr = (const uint8_t*)(linetable); uintptr_t addr = 0; int computed_line = firstlineno; // Running accumulator, separate from output + const size_t MAX_LINETABLE_ENTRIES = 65536; + size_t entry_count = 0; - while (*ptr != '\0') { + while (*ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) { + entry_count++; uint8_t first_byte = *(ptr++); uint8_t code = (first_byte >> 3) & 15; size_t length = (first_byte & 7) + 1; @@ -277,12 +281,9 @@ make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *locati int parse_code_object(RemoteUnwinderObject *unwinder, PyObject **result, - uintptr_t address, - uintptr_t instruction_pointer, - uintptr_t *previous_frame, - int32_t tlbc_index) + const CodeObjectContext *ctx) { - void *key = (void *)address; + void *key = (void *)ctx->code_addr; CachedCodeMetadata *meta = NULL; PyObject *func = NULL; PyObject *file = NULL; @@ -291,9 +292,9 @@ parse_code_object(RemoteUnwinderObject *unwinder, #ifdef Py_GIL_DISABLED // In free threading builds, code object addresses might have the low bit set // as a flag, so we need to mask it off to get the real address - uintptr_t real_address = address & (~1); + uintptr_t real_address = ctx->code_addr & (~1); #else - uintptr_t real_address = address; + uintptr_t real_address = ctx->code_addr; #endif if (unwinder && unwinder->code_object_cache != NULL) { @@ -360,12 +361,12 @@ parse_code_object(RemoteUnwinderObject *unwinder, linetable = NULL; } - uintptr_t ip = instruction_pointer; + uintptr_t ip = ctx->instruction_pointer; ptrdiff_t addrq; #ifdef Py_GIL_DISABLED // Handle thread-local bytecode (TLBC) in free threading builds - if (tlbc_index == 0 || unwinder->debug_offsets.code_object.co_tlbc == 0 || unwinder == NULL) { + if (ctx->tlbc_index == 0 || unwinder->debug_offsets.code_object.co_tlbc == 0 || unwinder == NULL) { // No TLBC or no unwinder - use main bytecode directly addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; goto done_tlbc; @@ -383,10 +384,12 @@ parse_code_object(RemoteUnwinderObject *unwinder, tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation); } - if (tlbc_entry && tlbc_index < tlbc_entry->tlbc_array_size) { + if (tlbc_entry && ctx->tlbc_index < tlbc_entry->tlbc_array_size) { + assert(ctx->tlbc_index >= 0); + assert(tlbc_entry->tlbc_array_size > 0); // Use cached TLBC data uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t)); - uintptr_t tlbc_bytecode_addr = entries[tlbc_index]; + uintptr_t tlbc_bytecode_addr = entries[ctx->tlbc_index]; if (tlbc_bytecode_addr != 0) { // Calculate offset from TLBC bytecode @@ -401,8 +404,6 @@ parse_code_object(RemoteUnwinderObject *unwinder, done_tlbc: #else // Non-free-threaded build, always use the main bytecode - (void)tlbc_index; // Suppress unused parameter warning - (void)unwinder; // Suppress unused parameter warning addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; #endif ; // Empty statement to avoid C23 extension warning diff --git a/Modules/_remote_debugging/frame_cache.c b/Modules/_remote_debugging/frame_cache.c index ab7891445e07a3..e94f4d3d81caea 100644 --- a/Modules/_remote_debugging/frame_cache.c +++ b/Modules/_remote_debugging/frame_cache.c @@ -44,7 +44,9 @@ frame_cache_find(RemoteUnwinderObject *unwinder, uint64_t thread_id) return NULL; } for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) { + assert(i >= 0 && i < FRAME_CACHE_MAX_THREADS); if (unwinder->frame_cache[i].thread_id == thread_id) { + assert(unwinder->frame_cache[i].num_addrs <= FRAME_CACHE_MAX_FRAMES); return &unwinder->frame_cache[i]; } } @@ -154,6 +156,8 @@ frame_cache_lookup_and_extend( return 0; } + assert(entry->num_addrs >= 0 && entry->num_addrs <= FRAME_CACHE_MAX_FRAMES); + // Find the index where last_profiled_frame matches Py_ssize_t start_idx = -1; for (Py_ssize_t i = 0; i < entry->num_addrs; i++) { @@ -166,6 +170,7 @@ frame_cache_lookup_and_extend( if (start_idx < 0) { return 0; // Not found } + assert(start_idx < entry->num_addrs); Py_ssize_t num_frames = PyList_GET_SIZE(entry->frame_list); @@ -225,6 +230,7 @@ frame_cache_store( if (num_addrs > FRAME_CACHE_MAX_FRAMES) { num_addrs = FRAME_CACHE_MAX_FRAMES; } + assert(num_addrs >= 0 && num_addrs <= FRAME_CACHE_MAX_FRAMES); FrameCacheEntry *entry = frame_cache_alloc_slot(unwinder, thread_id); if (!entry) { @@ -245,6 +251,8 @@ frame_cache_store( entry->thread_id = thread_id; memcpy(entry->addrs, addrs, num_addrs * sizeof(uintptr_t)); entry->num_addrs = num_addrs; + assert(entry->num_addrs == num_addrs); + assert(entry->thread_id == thread_id); return 1; } diff --git a/Modules/_remote_debugging/frames.c b/Modules/_remote_debugging/frames.c index 47e34e9f945cbd..8aebd40671ccbf 100644 --- a/Modules/_remote_debugging/frames.c +++ b/Modules/_remote_debugging/frames.c @@ -88,7 +88,8 @@ copy_stack_chunks(RemoteUnwinderObject *unwinder, return -1; } - while (chunk_addr != 0) { + const size_t MAX_STACK_CHUNKS = 4096; + while (chunk_addr != 0 && count < MAX_STACK_CHUNKS) { // Grow array if needed if (count >= max_chunks) { max_chunks *= 2; @@ -128,6 +129,7 @@ void * find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr) { for (size_t i = 0; i < chunks->count; ++i) { + assert(chunks->chunks[i].size > offsetof(_PyStackChunk, data)); uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data); size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data); @@ -209,7 +211,13 @@ parse_frame_object( #endif *address_of_code_object = code_object; - return parse_code_object(unwinder, result, code_object, instruction_pointer, previous_frame, tlbc_index); + + CodeObjectContext code_ctx = { + .code_addr = code_object, + .instruction_pointer = instruction_pointer, + .tlbc_index = tlbc_index, + }; + return parse_code_object(unwinder, result, &code_ctx); } int @@ -246,7 +254,12 @@ parse_frame_from_chunks( } #endif - return parse_code_object(unwinder, result, code_object, instruction_pointer, previous_frame, tlbc_index); + CodeObjectContext code_ctx = { + .code_addr = code_object, + .instruction_pointer = instruction_pointer, + .tlbc_index = tlbc_index, + }; + return parse_code_object(unwinder, result, &code_ctx); } /* ============================================================================ @@ -256,105 +269,80 @@ parse_frame_from_chunks( int process_frame_chain( RemoteUnwinderObject *unwinder, - uintptr_t initial_frame_addr, - StackChunkList *chunks, - PyObject *frame_info, - uintptr_t base_frame_addr, - uintptr_t gc_frame, - uintptr_t last_profiled_frame, - int *stopped_at_cached_frame, - uintptr_t *frame_addrs, // optional: C array to receive frame addresses - Py_ssize_t *num_addrs, // in/out: current count / updated count - Py_ssize_t max_addrs, // max capacity of frame_addrs array - uintptr_t *out_last_frame_addr) // optional: receives last frame address visited + FrameWalkContext *ctx) { - uintptr_t frame_addr = initial_frame_addr; + uintptr_t frame_addr = ctx->frame_addr; uintptr_t prev_frame_addr = 0; - uintptr_t last_frame_addr = 0; // Track last frame visited for validation + uintptr_t last_frame_addr = 0; const size_t MAX_FRAMES = 1024 + 512; size_t frame_count = 0; + assert(MAX_FRAMES > 0 && MAX_FRAMES < 10000); - // Initialize output parameters - if (stopped_at_cached_frame) { - *stopped_at_cached_frame = 0; - } - if (out_last_frame_addr) { - *out_last_frame_addr = 0; - } + ctx->stopped_at_cached_frame = 0; + ctx->last_frame_visited = 0; - // Quick check: if current_frame == last_profiled_frame, entire stack is unchanged - if (last_profiled_frame != 0 && initial_frame_addr == last_profiled_frame) { - if (stopped_at_cached_frame) { - *stopped_at_cached_frame = 1; - } + if (ctx->last_profiled_frame != 0 && ctx->frame_addr == ctx->last_profiled_frame) { + ctx->stopped_at_cached_frame = 1; return 0; } while ((void*)frame_addr != NULL) { - // Check if we've reached the cached frame - if so, stop here - if (last_profiled_frame != 0 && frame_addr == last_profiled_frame) { - if (stopped_at_cached_frame) { - *stopped_at_cached_frame = 1; - } + if (ctx->last_profiled_frame != 0 && frame_addr == ctx->last_profiled_frame) { + ctx->stopped_at_cached_frame = 1; break; } PyObject *frame = NULL; uintptr_t next_frame_addr = 0; uintptr_t stackpointer = 0; - last_frame_addr = frame_addr; // Remember this frame address + last_frame_addr = frame_addr; if (++frame_count > MAX_FRAMES) { PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)"); set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain iteration limit exceeded"); return -1; } + assert(frame_count <= MAX_FRAMES); - if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, &stackpointer, chunks) < 0) { + if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, &stackpointer, ctx->chunks) < 0) { PyErr_Clear(); uintptr_t address_of_code_object = 0; - if (parse_frame_object(unwinder, &frame, frame_addr, &address_of_code_object ,&next_frame_addr) < 0) { + if (parse_frame_object(unwinder, &frame, frame_addr, &address_of_code_object, &next_frame_addr) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse frame object in chain"); return -1; } } - if (frame == NULL && PyList_GET_SIZE(frame_info) == 0) { + if (frame == NULL && PyList_GET_SIZE(ctx->frame_info) == 0) { const char *e = "Failed to parse initial frame in chain"; PyErr_SetString(PyExc_RuntimeError, e); return -1; } PyObject *extra_frame = NULL; - // This frame kicked off the current GC collection: - if (unwinder->gc && frame_addr == gc_frame) { + if (unwinder->gc && frame_addr == ctx->gc_frame) { _Py_DECLARE_STR(gc, ""); extra_frame = &_Py_STR(gc); } - // Otherwise, check for native frames to insert: else if (unwinder->native && - // We've reached an interpreter trampoline frame: frame == NULL && - // Bottommost frame is always native, so skip that one: next_frame_addr && - // Only suppress native frames if GC tracking is enabled and the next frame will be a GC frame: - !(unwinder->gc && next_frame_addr == gc_frame)) + !(unwinder->gc && next_frame_addr == ctx->gc_frame)) { _Py_DECLARE_STR(native, ""); extra_frame = &_Py_STR(native); } if (extra_frame) { - // Use "~" as file, None as location (synthetic frame), None as opcode PyObject *extra_frame_info = make_frame_info( unwinder, _Py_LATIN1_CHR('~'), Py_None, extra_frame, Py_None); if (extra_frame_info == NULL) { return -1; } - if (PyList_Append(frame_info, extra_frame_info) < 0) { + if (PyList_Append(ctx->frame_info, extra_frame_info) < 0) { Py_DECREF(extra_frame_info); set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append extra frame"); return -1; } - // Extra frames use 0 as address (they're synthetic) - if (frame_addrs && *num_addrs < max_addrs) { - frame_addrs[(*num_addrs)++] = 0; + if (ctx->frame_addrs && ctx->num_addrs < ctx->max_addrs) { + assert(ctx->num_addrs >= 0); + ctx->frame_addrs[ctx->num_addrs++] = 0; } Py_DECREF(extra_frame_info); } @@ -367,14 +355,14 @@ process_frame_chain( return -1; } - if (PyList_Append(frame_info, frame) < 0) { + if (PyList_Append(ctx->frame_info, frame) < 0) { Py_DECREF(frame); set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame"); return -1; } - // Track the address for this frame - if (frame_addrs && *num_addrs < max_addrs) { - frame_addrs[(*num_addrs)++] = frame_addr; + if (ctx->frame_addrs && ctx->num_addrs < ctx->max_addrs) { + assert(ctx->num_addrs >= 0); + ctx->frame_addrs[ctx->num_addrs++] = frame_addr; } Py_DECREF(frame); } @@ -383,21 +371,14 @@ process_frame_chain( frame_addr = next_frame_addr; } - // Validate we reached the base frame (sentinel at bottom of stack) - // Only validate if we walked the full chain (didn't stop at cached frame) - // and base_frame_addr is provided (non-zero) - int stopped_early = stopped_at_cached_frame && *stopped_at_cached_frame; - if (!stopped_early && base_frame_addr != 0 && last_frame_addr != base_frame_addr) { + if (!ctx->stopped_at_cached_frame && ctx->base_frame_addr != 0 && last_frame_addr != ctx->base_frame_addr) { PyErr_Format(PyExc_RuntimeError, "Incomplete sample: did not reach base frame (expected 0x%lx, got 0x%lx)", - base_frame_addr, last_frame_addr); + ctx->base_frame_addr, last_frame_addr); return -1; } - // Set output parameter for caller (needed for cache validation) - if (out_last_frame_addr) { - *out_last_frame_addr = last_frame_addr; - } + ctx->last_frame_visited = last_frame_addr; return 0; } @@ -410,8 +391,11 @@ clear_last_profiled_frames(RemoteUnwinderObject *unwinder) { uintptr_t current_interp = unwinder->interpreter_addr; uintptr_t zero = 0; + const size_t MAX_INTERPRETERS = 256; + size_t interp_count = 0; - while (current_interp != 0) { + while (current_interp != 0 && interp_count < MAX_INTERPRETERS) { + interp_count++; // Get first thread in this interpreter uintptr_t tstate_addr; if (_Py_RemoteDebug_PagedReadRemoteMemory( @@ -425,7 +409,10 @@ clear_last_profiled_frames(RemoteUnwinderObject *unwinder) } // Iterate all threads in this interpreter - while (tstate_addr != 0) { + const size_t MAX_THREADS_PER_INTERP = 8192; + size_t thread_count = 0; + while (tstate_addr != 0 && thread_count < MAX_THREADS_PER_INTERP) { + thread_count++; // Clear last_profiled_frame uintptr_t lpf_addr = tstate_addr + unwinder->debug_offsets.thread_state.last_profiled_frame; if (_Py_RemoteDebug_WriteRemoteMemory(&unwinder->handle, lpf_addr, @@ -468,16 +455,13 @@ clear_last_profiled_frames(RemoteUnwinderObject *unwinder) static int try_full_cache_hit( RemoteUnwinderObject *unwinder, - uintptr_t frame_addr, - uintptr_t last_profiled_frame, - uint64_t thread_id, - PyObject *frame_info) + const FrameWalkContext *ctx, + uint64_t thread_id) { - if (!unwinder->frame_cache || last_profiled_frame == 0) { + if (!unwinder->frame_cache || ctx->last_profiled_frame == 0) { return 0; } - // Full hit only if current frame == last profiled frame - if (frame_addr != last_profiled_frame) { + if (ctx->frame_addr != ctx->last_profiled_frame) { return 0; } @@ -486,22 +470,19 @@ try_full_cache_hit( return 0; } - // Verify first address matches (sanity check) - if (entry->num_addrs == 0 || entry->addrs[0] != frame_addr) { + if (entry->num_addrs == 0 || entry->addrs[0] != ctx->frame_addr) { return 0; } - // Always read the current frame from memory to get updated line number PyObject *current_frame = NULL; uintptr_t code_object_addr = 0; uintptr_t previous_frame = 0; - int parse_result = parse_frame_object(unwinder, ¤t_frame, frame_addr, + int parse_result = parse_frame_object(unwinder, ¤t_frame, ctx->frame_addr, &code_object_addr, &previous_frame); if (parse_result < 0) { return -1; } - // Get cached parent frames first (before modifying frame_info) Py_ssize_t cached_size = PyList_GET_SIZE(entry->frame_list); PyObject *parent_slice = NULL; if (cached_size > 1) { @@ -512,9 +493,8 @@ try_full_cache_hit( } } - // Now safe to modify frame_info - add current frame if valid if (current_frame != NULL) { - if (PyList_Append(frame_info, current_frame) < 0) { + if (PyList_Append(ctx->frame_info, current_frame) < 0) { Py_DECREF(current_frame); Py_XDECREF(parent_slice); return -1; @@ -523,10 +503,9 @@ try_full_cache_hit( STATS_ADD(unwinder, frames_read_from_memory, 1); } - // Extend with cached parent frames if (parent_slice) { - Py_ssize_t cur_size = PyList_GET_SIZE(frame_info); - int result = PyList_SetSlice(frame_info, cur_size, cur_size, parent_slice); + Py_ssize_t cur_size = PyList_GET_SIZE(ctx->frame_info); + int result = PyList_SetSlice(ctx->frame_info, cur_size, cur_size, parent_slice); Py_DECREF(parent_slice); if (result < 0) { return -1; @@ -543,72 +522,67 @@ try_full_cache_hit( int collect_frames_with_cache( RemoteUnwinderObject *unwinder, - uintptr_t frame_addr, - StackChunkList *chunks, - PyObject *frame_info, - uintptr_t base_frame_addr, - uintptr_t gc_frame, - uintptr_t last_profiled_frame, + FrameWalkContext *ctx, uint64_t thread_id) { - // Fast path: check for full cache hit first (no allocations needed) - int full_hit = try_full_cache_hit(unwinder, frame_addr, last_profiled_frame, - thread_id, frame_info); + int full_hit = try_full_cache_hit(unwinder, ctx, thread_id); if (full_hit != 0) { - return full_hit < 0 ? -1 : 0; // Either error or success + return full_hit < 0 ? -1 : 0; } - uintptr_t addrs[FRAME_CACHE_MAX_FRAMES]; - Py_ssize_t num_addrs = 0; - Py_ssize_t frames_before = PyList_GET_SIZE(frame_info); - uintptr_t last_frame_visited = 0; + Py_ssize_t frames_before = PyList_GET_SIZE(ctx->frame_info); - int stopped_at_cached = 0; - if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, base_frame_addr, gc_frame, - last_profiled_frame, &stopped_at_cached, - addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES, - &last_frame_visited) < 0) { + if (process_frame_chain(unwinder, ctx) < 0) { return -1; } - // Track frames read from memory (frames added by process_frame_chain) - STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(frame_info) - frames_before); + STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(ctx->frame_info) - frames_before); - // If stopped at cached frame, extend with cached continuation (both frames and addresses) - if (stopped_at_cached) { - Py_ssize_t frames_before_cache = PyList_GET_SIZE(frame_info); - int cache_result = frame_cache_lookup_and_extend(unwinder, thread_id, last_profiled_frame, - frame_info, addrs, &num_addrs, - FRAME_CACHE_MAX_FRAMES); + if (ctx->stopped_at_cached_frame) { + Py_ssize_t frames_before_cache = PyList_GET_SIZE(ctx->frame_info); + int cache_result = frame_cache_lookup_and_extend(unwinder, thread_id, ctx->last_profiled_frame, + ctx->frame_info, ctx->frame_addrs, &ctx->num_addrs, + ctx->max_addrs); if (cache_result < 0) { return -1; } if (cache_result == 0) { - // Cache miss - continue walking from last_profiled_frame to get the rest STATS_INC(unwinder, frame_cache_misses); - Py_ssize_t frames_before_walk = PyList_GET_SIZE(frame_info); - if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, base_frame_addr, gc_frame, - 0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES, - &last_frame_visited) < 0) { + Py_ssize_t frames_before_walk = PyList_GET_SIZE(ctx->frame_info); + + FrameWalkContext continue_ctx = { + .frame_addr = ctx->last_profiled_frame, + .base_frame_addr = ctx->base_frame_addr, + .gc_frame = ctx->gc_frame, + .last_profiled_frame = 0, + .chunks = ctx->chunks, + .frame_info = ctx->frame_info, + .frame_addrs = ctx->frame_addrs, + .num_addrs = ctx->num_addrs, + .max_addrs = ctx->max_addrs, + }; + if (process_frame_chain(unwinder, &continue_ctx) < 0) { return -1; } - STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(frame_info) - frames_before_walk); + ctx->num_addrs = continue_ctx.num_addrs; + ctx->last_frame_visited = continue_ctx.last_frame_visited; + + STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(ctx->frame_info) - frames_before_walk); } else { - // Partial cache hit - cache was validated when stored, so we trust it + // Partial cache hit - cached stack was validated as complete when stored, + // so set last_frame_visited to base_frame_addr for validation in frame_cache_store + ctx->last_frame_visited = ctx->base_frame_addr; STATS_INC(unwinder, frame_cache_partial_hits); - STATS_ADD(unwinder, frames_read_from_cache, PyList_GET_SIZE(frame_info) - frames_before_cache); + STATS_ADD(unwinder, frames_read_from_cache, PyList_GET_SIZE(ctx->frame_info) - frames_before_cache); } } else { - if (last_profiled_frame == 0) { - // No cache involvement (no last_profiled_frame or cache disabled) + if (ctx->last_profiled_frame == 0) { STATS_INC(unwinder, frame_cache_misses); } } - // Store in cache - frame_cache_store validates internally that we have a - // complete stack (reached base_frame_addr) before actually storing - if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs, - base_frame_addr, last_frame_visited) < 0) { + if (frame_cache_store(unwinder, thread_id, ctx->frame_info, ctx->frame_addrs, ctx->num_addrs, + ctx->base_frame_addr, ctx->last_frame_visited) < 0) { return -1; } diff --git a/Modules/_remote_debugging/threads.c b/Modules/_remote_debugging/threads.c index 6db774ecfc269e..3a5b8adb3f4dda 100644 --- a/Modules/_remote_debugging/threads.c +++ b/Modules/_remote_debugging/threads.c @@ -23,6 +23,8 @@ iterate_threads( ) { uintptr_t thread_state_addr; unsigned long tid = 0; + const size_t MAX_THREADS = 8192; + size_t thread_count = 0; if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( &unwinder->handle, @@ -34,7 +36,8 @@ iterate_threads( return -1; } - while (thread_state_addr != 0) { + while (thread_state_addr != 0 && thread_count < MAX_THREADS) { + thread_count++; if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( &unwinder->handle, thread_state_addr + (uintptr_t)unwinder->debug_offsets.thread_state.native_thread_id, @@ -425,12 +428,24 @@ unwind_stack_for_thread( } } + uintptr_t addrs[FRAME_CACHE_MAX_FRAMES]; + FrameWalkContext ctx = { + .frame_addr = frame_addr, + .base_frame_addr = base_frame_addr, + .gc_frame = gc_frame, + .chunks = &chunks, + .frame_info = frame_info, + .frame_addrs = addrs, + .num_addrs = 0, + .max_addrs = FRAME_CACHE_MAX_FRAMES, + }; + assert(ctx.max_addrs == FRAME_CACHE_MAX_FRAMES); + if (unwinder->cache_frames) { // Use cache to avoid re-reading unchanged parent frames - uintptr_t last_profiled_frame = GET_MEMBER(uintptr_t, ts, + ctx.last_profiled_frame = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.last_profiled_frame); - if (collect_frames_with_cache(unwinder, frame_addr, &chunks, frame_info, - base_frame_addr, gc_frame, last_profiled_frame, tid) < 0) { + if (collect_frames_with_cache(unwinder, &ctx, tid) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to collect frames"); goto error; } @@ -443,8 +458,7 @@ unwind_stack_for_thread( } } else { // No caching - process entire frame chain with base_frame validation - if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info, - base_frame_addr, gc_frame, 0, NULL, NULL, NULL, 0, NULL) < 0) { + if (process_frame_chain(unwinder, &ctx) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain"); goto error; }