From 8dc7a48787ed0eb514c432a363df404670e58b3f Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Fri, 26 Dec 2025 14:49:00 +0000 Subject: [PATCH 1/3] gh-140739: Fix crashes from corrupted remote memory The remote debugging module reads memory from another Python process which can be modified or freed at any time due to race conditions. When garbage data is read, various code paths could cause SIGSEGV crashes in the profiler process itself rather than gracefully rejecting the sample. Add bounds checking and validation for data read from remote memory: linetable parsing now checks buffer bounds, PyLong reading validates digit count, stack chunk sizes are bounded, set iteration limits table size, task pointer arithmetic checks for underflow, TLBC index is validated against array bounds, and thread list iteration detects cycles. All cases now reject the sample with an exception instead of crashing or looping forever. --- Modules/_remote_debugging/_remote_debugging.h | 6 + Modules/_remote_debugging/asyncio.c | 27 +++- Modules/_remote_debugging/code_objects.c | 117 ++++++++++++++---- Modules/_remote_debugging/frames.c | 15 ++- Modules/_remote_debugging/module.c | 9 ++ Modules/_remote_debugging/object_reading.c | 18 ++- 6 files changed, 156 insertions(+), 36 deletions(-) diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h index 4119d916be7a63..78add74423b608 100644 --- a/Modules/_remote_debugging/_remote_debugging.h +++ b/Modules/_remote_debugging/_remote_debugging.h @@ -140,6 +140,11 @@ typedef enum _WIN32_THREADSTATE { #define SIZEOF_GC_RUNTIME_STATE sizeof(struct _gc_runtime_state) #define SIZEOF_INTERPRETER_STATE sizeof(PyInterpreterState) +/* Maximum sizes for validation to prevent buffer overflows from corrupted data */ +#define MAX_STACK_CHUNK_SIZE (16 * 1024 * 1024) /* 16 MB max for stack chunks */ +#define MAX_LONG_DIGITS 64 /* Allows values up to ~2^1920 */ +#define MAX_SET_TABLE_SIZE (1 << 20) /* 1 million entries max for set iteration */ + #ifndef MAX #define MAX(a, b) ((a) > (b) ? (a) : (b)) #endif @@ -451,6 +456,7 @@ extern PyObject *make_frame_info( extern bool parse_linetable( const uintptr_t addrq, const char* linetable, + Py_ssize_t linetable_size, int firstlineno, LocationInfo* info ); diff --git a/Modules/_remote_debugging/asyncio.c b/Modules/_remote_debugging/asyncio.c index 7f91f16e3a2ce6..2f62b289bad37f 100644 --- a/Modules/_remote_debugging/asyncio.c +++ b/Modules/_remote_debugging/asyncio.c @@ -112,9 +112,17 @@ iterate_set_entries( } Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.used); - Py_ssize_t set_len = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask) + 1; + Py_ssize_t mask = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask); uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object, unwinder->debug_offsets.set_object.table); + // Validate mask and num_els to prevent huge loop iterations from garbage data + if (mask < 0 || mask >= MAX_SET_TABLE_SIZE || num_els < 0 || num_els > mask + 1) { + set_exception_cause(unwinder, PyExc_RuntimeError, + "Invalid set object (corrupted remote memory)"); + return -1; + } + Py_ssize_t set_len = mask + 1; + Py_ssize_t i = 0; Py_ssize_t els = 0; while (i < set_len && els < num_els) { @@ -812,25 +820,32 @@ append_awaited_by_for_thread( return -1; } - if (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) == 0) { + uintptr_t next_node = GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next); + if (next_node == 0) { PyErr_SetString(PyExc_RuntimeError, "Invalid linked list structure reading remote memory"); set_exception_cause(unwinder, PyExc_RuntimeError, "NULL pointer in task linked list"); return -1; } - uintptr_t task_addr = (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) - - (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_node; + // Validate next_node to prevent underflow when computing task_addr + uintptr_t task_node_offset = (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_node; + if (next_node < task_node_offset) { + set_exception_cause(unwinder, PyExc_RuntimeError, + "Invalid task node pointer (corrupted remote memory)"); + return -1; + } + uintptr_t task_addr = next_node - task_node_offset; if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process task node in awaited_by"); return -1; } - // Read next node + // Read next node (use already-validated next_node) if (_Py_RemoteDebug_PagedReadRemoteMemory( &unwinder->handle, - (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next), + next_node, sizeof(task_node), task_node) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next task node in awaited_by"); diff --git a/Modules/_remote_debugging/code_objects.c b/Modules/_remote_debugging/code_objects.c index ca6ffe7a00af60..9b7b4dc22b873b 100644 --- a/Modules/_remote_debugging/code_objects.c +++ b/Modules/_remote_debugging/code_objects.c @@ -123,44 +123,74 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t * LINE TABLE PARSING FUNCTIONS * ============================================================================ */ +// Inline helper for bounds-checked byte reading (no function call overhead) +static inline int +read_byte(const uint8_t **ptr, const uint8_t *end, uint8_t *out) +{ + if (*ptr >= end) { + return -1; + } + *out = *(*ptr)++; + return 0; +} + static int -scan_varint(const uint8_t **ptr) +scan_varint(const uint8_t **ptr, const uint8_t *end) { - unsigned int read = **ptr; - *ptr = *ptr + 1; + uint8_t read; + if (read_byte(ptr, end, &read) < 0) { + return -1; + } unsigned int val = read & 63; unsigned int shift = 0; while (read & 64) { - read = **ptr; - *ptr = *ptr + 1; + if (read_byte(ptr, end, &read) < 0) { + return -1; + } shift += 6; + // Prevent infinite loop on malformed data (shift overflow) + if (shift > 28) { + return -1; + } val |= (read & 63) << shift; } - return val; + return (int)val; } static int -scan_signed_varint(const uint8_t **ptr) +scan_signed_varint(const uint8_t **ptr, const uint8_t *end) { - unsigned int uval = scan_varint(ptr); + int uval = scan_varint(ptr, end); + if (uval < 0) { + return INT_MIN; // Error sentinel (valid signed varints won't be INT_MIN) + } if (uval & 1) { - return -(int)(uval >> 1); + return -(int)((unsigned int)uval >> 1); } else { - return uval >> 1; + return (int)((unsigned int)uval >> 1); } } bool -parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info) +parse_linetable(const uintptr_t addrq, const char* linetable, Py_ssize_t linetable_size, + int firstlineno, LocationInfo* info) { + // Reject garbage: zero or negative size + if (linetable_size <= 0) { + return false; + } + const uint8_t* ptr = (const uint8_t*)(linetable); + const uint8_t* end = ptr + linetable_size; uintptr_t addr = 0; int computed_line = firstlineno; // Running accumulator, separate from output + int val; // Temporary for varint results + uint8_t byte; // Temporary for byte reads const size_t MAX_LINETABLE_ENTRIES = 65536; size_t entry_count = 0; - while (*ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) { + while (ptr < end && *ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) { entry_count++; uint8_t first_byte = *(ptr++); uint8_t code = (first_byte >> 3) & 15; @@ -173,14 +203,34 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L info->column = info->end_column = -1; break; case PY_CODE_LOCATION_INFO_LONG: - computed_line += scan_signed_varint(&ptr); + val = scan_signed_varint(&ptr, end); + if (val == INT_MIN) { + return false; + } + computed_line += val; info->lineno = computed_line; - info->end_lineno = computed_line + scan_varint(&ptr); - info->column = scan_varint(&ptr) - 1; - info->end_column = scan_varint(&ptr) - 1; + val = scan_varint(&ptr, end); + if (val < 0) { + return false; + } + info->end_lineno = computed_line + val; + val = scan_varint(&ptr, end); + if (val < 0) { + return false; + } + info->column = val - 1; + val = scan_varint(&ptr, end); + if (val < 0) { + return false; + } + info->end_column = val - 1; break; case PY_CODE_LOCATION_INFO_NO_COLUMNS: - computed_line += scan_signed_varint(&ptr); + val = scan_signed_varint(&ptr, end); + if (val == INT_MIN) { + return false; + } + computed_line += val; info->lineno = info->end_lineno = computed_line; info->column = info->end_column = -1; break; @@ -189,17 +239,25 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L case PY_CODE_LOCATION_INFO_ONE_LINE2: computed_line += code - 10; info->lineno = info->end_lineno = computed_line; - info->column = *(ptr++); - info->end_column = *(ptr++); + if (read_byte(&ptr, end, &byte) < 0) { + return false; + } + info->column = byte; + if (read_byte(&ptr, end, &byte) < 0) { + return false; + } + info->end_column = byte; break; default: { - uint8_t second_byte = *(ptr++); - if ((second_byte & 128) != 0) { + if (read_byte(&ptr, end, &byte) < 0) { + return false; + } + if ((byte & 128) != 0) { return false; } info->lineno = info->end_lineno = computed_line; - info->column = code << 3 | (second_byte >> 4); - info->end_column = info->column + (second_byte & 15); + info->column = code << 3 | (byte >> 4); + info->end_column = info->column + (byte & 15); break; } } @@ -384,8 +442,14 @@ parse_code_object(RemoteUnwinderObject *unwinder, tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation); } - if (tlbc_entry && ctx->tlbc_index < tlbc_entry->tlbc_array_size) { - assert(ctx->tlbc_index >= 0); + // Validate tlbc_index and check TLBC cache + if (tlbc_entry) { + // Validate index bounds (also catches negative values since tlbc_index is signed) + if (ctx->tlbc_index < 0 || ctx->tlbc_index >= tlbc_entry->tlbc_array_size) { + set_exception_cause(unwinder, PyExc_RuntimeError, + "Invalid tlbc_index (corrupted remote memory)"); + goto error; + } assert(tlbc_entry->tlbc_array_size > 0); // Use cached TLBC data uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t)); @@ -398,7 +462,7 @@ parse_code_object(RemoteUnwinderObject *unwinder, } } - // Fall back to main bytecode + // Fall back to main bytecode (no tlbc_entry or tlbc_bytecode_addr was 0) addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; done_tlbc: @@ -409,6 +473,7 @@ parse_code_object(RemoteUnwinderObject *unwinder, ; // Empty statement to avoid C23 extension warning LocationInfo info = {0}; bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable), + PyBytes_GET_SIZE(meta->linetable), meta->first_lineno, &info); if (!ok) { info.lineno = -1; diff --git a/Modules/_remote_debugging/frames.c b/Modules/_remote_debugging/frames.c index d9fece6459875a..02c48205b85a37 100644 --- a/Modules/_remote_debugging/frames.c +++ b/Modules/_remote_debugging/frames.c @@ -45,6 +45,15 @@ process_single_stack_chunk( // Check actual size and reread if necessary size_t actual_size = GET_MEMBER(size_t, this_chunk, offsetof(_PyStackChunk, size)); if (actual_size != current_size) { + // Validate size: reject garbage (too small or unreasonably large) + // Size must be at least enough for the header and reasonably bounded + if (actual_size <= offsetof(_PyStackChunk, data) || actual_size > MAX_STACK_CHUNK_SIZE) { + PyMem_RawFree(this_chunk); + set_exception_cause(unwinder, PyExc_RuntimeError, + "Invalid stack chunk size (corrupted remote memory)"); + return -1; + } + this_chunk = PyMem_RawRealloc(this_chunk, actual_size); if (!this_chunk) { PyErr_NoMemory(); @@ -129,7 +138,11 @@ void * find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr) { for (size_t i = 0; i < chunks->count; ++i) { - assert(chunks->chunks[i].size > offsetof(_PyStackChunk, data)); + // Validate size: reject garbage that would cause underflow + if (chunks->chunks[i].size <= offsetof(_PyStackChunk, data)) { + // Skip this chunk - corrupted size from remote memory + continue; + } uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data); size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data); diff --git a/Modules/_remote_debugging/module.c b/Modules/_remote_debugging/module.c index e14a23357fb400..737787a331f948 100644 --- a/Modules/_remote_debugging/module.c +++ b/Modules/_remote_debugging/module.c @@ -584,6 +584,7 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self } while (current_tstate != 0) { + uintptr_t prev_tstate = current_tstate; PyObject* frame_info = unwind_stack_for_thread(self, ¤t_tstate, gil_holder_tstate, gc_frame); @@ -591,6 +592,14 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self // Check if this was an intentional skip due to mode-based filtering if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL || self->mode == PROFILING_MODE_EXCEPTION) && !PyErr_Occurred()) { + // Detect cycle: if current_tstate didn't advance, we have corrupted data + if (current_tstate == prev_tstate) { + Py_DECREF(interpreter_threads); + set_exception_cause(self, PyExc_RuntimeError, + "Thread list cycle detected (corrupted remote memory)"); + Py_CLEAR(result); + goto exit; + } // Thread was skipped due to mode filtering, continue to next thread continue; } diff --git a/Modules/_remote_debugging/object_reading.c b/Modules/_remote_debugging/object_reading.c index 2f465ca0cac41d..447b7fd5926064 100644 --- a/Modules/_remote_debugging/object_reading.c +++ b/Modules/_remote_debugging/object_reading.c @@ -194,9 +194,21 @@ read_py_long( return 0; } - // If the long object has inline digits, use them directly + // Validate size: reject garbage (negative or unreasonably large) + if (size < 0 || size > MAX_LONG_DIGITS) { + set_exception_cause(unwinder, PyExc_RuntimeError, + "Invalid PyLong size (corrupted remote memory)"); + return -1; + } + + // Calculate how many digits fit inline in our local buffer + Py_ssize_t ob_digit_offset = unwinder->debug_offsets.long_object.ob_digit; + Py_ssize_t inline_digits_space = SIZEOF_LONG_OBJ - ob_digit_offset; + Py_ssize_t max_inline_digits = inline_digits_space / (Py_ssize_t)sizeof(digit); + + // If the long object has inline digits that fit in our buffer, use them directly digit *digits; - if (size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) { + if (size <= max_inline_digits && size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) { // For small integers, digits are inline in the long_value.ob_digit array digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); if (!digits) { @@ -204,7 +216,7 @@ read_py_long( set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for small PyLong"); return -1; } - memcpy(digits, long_obj + unwinder->debug_offsets.long_object.ob_digit, size * sizeof(digit)); + memcpy(digits, long_obj + ob_digit_offset, size * sizeof(digit)); } else { // For larger integers, we need to read the digits separately digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); From d18b5227d9fdf3cc5347768c06a28e039a3f4006 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Fri, 26 Dec 2025 14:51:57 +0000 Subject: [PATCH 2/3] Add NEWS entry --- .../2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst | 2 ++ Modules/_remote_debugging/asyncio.c | 12 +++--------- 2 files changed, 5 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst diff --git a/Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst b/Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst new file mode 100644 index 00000000000000..711179c735790f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst @@ -0,0 +1,2 @@ +Fix several crashes due to reading invalid memory in the new Tachyon +sampling profiler. Patch by Pablo Galindo diff --git a/Modules/_remote_debugging/asyncio.c b/Modules/_remote_debugging/asyncio.c index 2f62b289bad37f..3fcc939fd0e876 100644 --- a/Modules/_remote_debugging/asyncio.c +++ b/Modules/_remote_debugging/asyncio.c @@ -828,21 +828,15 @@ append_awaited_by_for_thread( return -1; } - // Validate next_node to prevent underflow when computing task_addr - uintptr_t task_node_offset = (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_node; - if (next_node < task_node_offset) { - set_exception_cause(unwinder, PyExc_RuntimeError, - "Invalid task node pointer (corrupted remote memory)"); - return -1; - } - uintptr_t task_addr = next_node - task_node_offset; + uintptr_t task_addr = next_node + - (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_node; if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process task node in awaited_by"); return -1; } - // Read next node (use already-validated next_node) + // Read next node if (_Py_RemoteDebug_PagedReadRemoteMemory( &unwinder->handle, next_node, From 05c8f1ec64433771a2dd5b67565b2a6b921cb98f Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Fri, 26 Dec 2025 15:40:46 +0000 Subject: [PATCH 3/3] Update Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst Co-authored-by: Ken Jin --- .../next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst b/Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst index 711179c735790f..ae93cbb51114f1 100644 --- a/Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst +++ b/Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst @@ -1,2 +1,2 @@ Fix several crashes due to reading invalid memory in the new Tachyon -sampling profiler. Patch by Pablo Galindo +sampling profiler. Patch by Pablo Galindo.