diff --git a/Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst b/Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst new file mode 100644 index 00000000000000..ae93cbb51114f1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst @@ -0,0 +1,2 @@ +Fix several crashes due to reading invalid memory in the new Tachyon +sampling profiler. Patch by Pablo Galindo. diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h index 4119d916be7a63..78add74423b608 100644 --- a/Modules/_remote_debugging/_remote_debugging.h +++ b/Modules/_remote_debugging/_remote_debugging.h @@ -140,6 +140,11 @@ typedef enum _WIN32_THREADSTATE { #define SIZEOF_GC_RUNTIME_STATE sizeof(struct _gc_runtime_state) #define SIZEOF_INTERPRETER_STATE sizeof(PyInterpreterState) +/* Maximum sizes for validation to prevent buffer overflows from corrupted data */ +#define MAX_STACK_CHUNK_SIZE (16 * 1024 * 1024) /* 16 MB max for stack chunks */ +#define MAX_LONG_DIGITS 64 /* Allows values up to ~2^1920 */ +#define MAX_SET_TABLE_SIZE (1 << 20) /* 1 million entries max for set iteration */ + #ifndef MAX #define MAX(a, b) ((a) > (b) ? (a) : (b)) #endif @@ -451,6 +456,7 @@ extern PyObject *make_frame_info( extern bool parse_linetable( const uintptr_t addrq, const char* linetable, + Py_ssize_t linetable_size, int firstlineno, LocationInfo* info ); diff --git a/Modules/_remote_debugging/asyncio.c b/Modules/_remote_debugging/asyncio.c index 7f91f16e3a2ce6..3fcc939fd0e876 100644 --- a/Modules/_remote_debugging/asyncio.c +++ b/Modules/_remote_debugging/asyncio.c @@ -112,9 +112,17 @@ iterate_set_entries( } Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.used); - Py_ssize_t set_len = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask) + 1; + Py_ssize_t mask = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask); uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object, unwinder->debug_offsets.set_object.table); + // Validate mask and num_els to prevent huge loop iterations from garbage data + if (mask < 0 || mask >= MAX_SET_TABLE_SIZE || num_els < 0 || num_els > mask + 1) { + set_exception_cause(unwinder, PyExc_RuntimeError, + "Invalid set object (corrupted remote memory)"); + return -1; + } + Py_ssize_t set_len = mask + 1; + Py_ssize_t i = 0; Py_ssize_t els = 0; while (i < set_len && els < num_els) { @@ -812,14 +820,15 @@ append_awaited_by_for_thread( return -1; } - if (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) == 0) { + uintptr_t next_node = GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next); + if (next_node == 0) { PyErr_SetString(PyExc_RuntimeError, "Invalid linked list structure reading remote memory"); set_exception_cause(unwinder, PyExc_RuntimeError, "NULL pointer in task linked list"); return -1; } - uintptr_t task_addr = (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) + uintptr_t task_addr = next_node - (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_node; if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) { @@ -830,7 +839,7 @@ append_awaited_by_for_thread( // Read next node if (_Py_RemoteDebug_PagedReadRemoteMemory( &unwinder->handle, - (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next), + next_node, sizeof(task_node), task_node) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next task node in awaited_by"); diff --git a/Modules/_remote_debugging/code_objects.c b/Modules/_remote_debugging/code_objects.c index ca6ffe7a00af60..9b7b4dc22b873b 100644 --- a/Modules/_remote_debugging/code_objects.c +++ b/Modules/_remote_debugging/code_objects.c @@ -123,44 +123,74 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t * LINE TABLE PARSING FUNCTIONS * ============================================================================ */ +// Inline helper for bounds-checked byte reading (no function call overhead) +static inline int +read_byte(const uint8_t **ptr, const uint8_t *end, uint8_t *out) +{ + if (*ptr >= end) { + return -1; + } + *out = *(*ptr)++; + return 0; +} + static int -scan_varint(const uint8_t **ptr) +scan_varint(const uint8_t **ptr, const uint8_t *end) { - unsigned int read = **ptr; - *ptr = *ptr + 1; + uint8_t read; + if (read_byte(ptr, end, &read) < 0) { + return -1; + } unsigned int val = read & 63; unsigned int shift = 0; while (read & 64) { - read = **ptr; - *ptr = *ptr + 1; + if (read_byte(ptr, end, &read) < 0) { + return -1; + } shift += 6; + // Prevent infinite loop on malformed data (shift overflow) + if (shift > 28) { + return -1; + } val |= (read & 63) << shift; } - return val; + return (int)val; } static int -scan_signed_varint(const uint8_t **ptr) +scan_signed_varint(const uint8_t **ptr, const uint8_t *end) { - unsigned int uval = scan_varint(ptr); + int uval = scan_varint(ptr, end); + if (uval < 0) { + return INT_MIN; // Error sentinel (valid signed varints won't be INT_MIN) + } if (uval & 1) { - return -(int)(uval >> 1); + return -(int)((unsigned int)uval >> 1); } else { - return uval >> 1; + return (int)((unsigned int)uval >> 1); } } bool -parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info) +parse_linetable(const uintptr_t addrq, const char* linetable, Py_ssize_t linetable_size, + int firstlineno, LocationInfo* info) { + // Reject garbage: zero or negative size + if (linetable_size <= 0) { + return false; + } + const uint8_t* ptr = (const uint8_t*)(linetable); + const uint8_t* end = ptr + linetable_size; uintptr_t addr = 0; int computed_line = firstlineno; // Running accumulator, separate from output + int val; // Temporary for varint results + uint8_t byte; // Temporary for byte reads const size_t MAX_LINETABLE_ENTRIES = 65536; size_t entry_count = 0; - while (*ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) { + while (ptr < end && *ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) { entry_count++; uint8_t first_byte = *(ptr++); uint8_t code = (first_byte >> 3) & 15; @@ -173,14 +203,34 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L info->column = info->end_column = -1; break; case PY_CODE_LOCATION_INFO_LONG: - computed_line += scan_signed_varint(&ptr); + val = scan_signed_varint(&ptr, end); + if (val == INT_MIN) { + return false; + } + computed_line += val; info->lineno = computed_line; - info->end_lineno = computed_line + scan_varint(&ptr); - info->column = scan_varint(&ptr) - 1; - info->end_column = scan_varint(&ptr) - 1; + val = scan_varint(&ptr, end); + if (val < 0) { + return false; + } + info->end_lineno = computed_line + val; + val = scan_varint(&ptr, end); + if (val < 0) { + return false; + } + info->column = val - 1; + val = scan_varint(&ptr, end); + if (val < 0) { + return false; + } + info->end_column = val - 1; break; case PY_CODE_LOCATION_INFO_NO_COLUMNS: - computed_line += scan_signed_varint(&ptr); + val = scan_signed_varint(&ptr, end); + if (val == INT_MIN) { + return false; + } + computed_line += val; info->lineno = info->end_lineno = computed_line; info->column = info->end_column = -1; break; @@ -189,17 +239,25 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L case PY_CODE_LOCATION_INFO_ONE_LINE2: computed_line += code - 10; info->lineno = info->end_lineno = computed_line; - info->column = *(ptr++); - info->end_column = *(ptr++); + if (read_byte(&ptr, end, &byte) < 0) { + return false; + } + info->column = byte; + if (read_byte(&ptr, end, &byte) < 0) { + return false; + } + info->end_column = byte; break; default: { - uint8_t second_byte = *(ptr++); - if ((second_byte & 128) != 0) { + if (read_byte(&ptr, end, &byte) < 0) { + return false; + } + if ((byte & 128) != 0) { return false; } info->lineno = info->end_lineno = computed_line; - info->column = code << 3 | (second_byte >> 4); - info->end_column = info->column + (second_byte & 15); + info->column = code << 3 | (byte >> 4); + info->end_column = info->column + (byte & 15); break; } } @@ -384,8 +442,14 @@ parse_code_object(RemoteUnwinderObject *unwinder, tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation); } - if (tlbc_entry && ctx->tlbc_index < tlbc_entry->tlbc_array_size) { - assert(ctx->tlbc_index >= 0); + // Validate tlbc_index and check TLBC cache + if (tlbc_entry) { + // Validate index bounds (also catches negative values since tlbc_index is signed) + if (ctx->tlbc_index < 0 || ctx->tlbc_index >= tlbc_entry->tlbc_array_size) { + set_exception_cause(unwinder, PyExc_RuntimeError, + "Invalid tlbc_index (corrupted remote memory)"); + goto error; + } assert(tlbc_entry->tlbc_array_size > 0); // Use cached TLBC data uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t)); @@ -398,7 +462,7 @@ parse_code_object(RemoteUnwinderObject *unwinder, } } - // Fall back to main bytecode + // Fall back to main bytecode (no tlbc_entry or tlbc_bytecode_addr was 0) addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; done_tlbc: @@ -409,6 +473,7 @@ parse_code_object(RemoteUnwinderObject *unwinder, ; // Empty statement to avoid C23 extension warning LocationInfo info = {0}; bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable), + PyBytes_GET_SIZE(meta->linetable), meta->first_lineno, &info); if (!ok) { info.lineno = -1; diff --git a/Modules/_remote_debugging/frames.c b/Modules/_remote_debugging/frames.c index d9fece6459875a..02c48205b85a37 100644 --- a/Modules/_remote_debugging/frames.c +++ b/Modules/_remote_debugging/frames.c @@ -45,6 +45,15 @@ process_single_stack_chunk( // Check actual size and reread if necessary size_t actual_size = GET_MEMBER(size_t, this_chunk, offsetof(_PyStackChunk, size)); if (actual_size != current_size) { + // Validate size: reject garbage (too small or unreasonably large) + // Size must be at least enough for the header and reasonably bounded + if (actual_size <= offsetof(_PyStackChunk, data) || actual_size > MAX_STACK_CHUNK_SIZE) { + PyMem_RawFree(this_chunk); + set_exception_cause(unwinder, PyExc_RuntimeError, + "Invalid stack chunk size (corrupted remote memory)"); + return -1; + } + this_chunk = PyMem_RawRealloc(this_chunk, actual_size); if (!this_chunk) { PyErr_NoMemory(); @@ -129,7 +138,11 @@ void * find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr) { for (size_t i = 0; i < chunks->count; ++i) { - assert(chunks->chunks[i].size > offsetof(_PyStackChunk, data)); + // Validate size: reject garbage that would cause underflow + if (chunks->chunks[i].size <= offsetof(_PyStackChunk, data)) { + // Skip this chunk - corrupted size from remote memory + continue; + } uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data); size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data); diff --git a/Modules/_remote_debugging/module.c b/Modules/_remote_debugging/module.c index e14a23357fb400..737787a331f948 100644 --- a/Modules/_remote_debugging/module.c +++ b/Modules/_remote_debugging/module.c @@ -584,6 +584,7 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self } while (current_tstate != 0) { + uintptr_t prev_tstate = current_tstate; PyObject* frame_info = unwind_stack_for_thread(self, ¤t_tstate, gil_holder_tstate, gc_frame); @@ -591,6 +592,14 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self // Check if this was an intentional skip due to mode-based filtering if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL || self->mode == PROFILING_MODE_EXCEPTION) && !PyErr_Occurred()) { + // Detect cycle: if current_tstate didn't advance, we have corrupted data + if (current_tstate == prev_tstate) { + Py_DECREF(interpreter_threads); + set_exception_cause(self, PyExc_RuntimeError, + "Thread list cycle detected (corrupted remote memory)"); + Py_CLEAR(result); + goto exit; + } // Thread was skipped due to mode filtering, continue to next thread continue; } diff --git a/Modules/_remote_debugging/object_reading.c b/Modules/_remote_debugging/object_reading.c index 2f465ca0cac41d..447b7fd5926064 100644 --- a/Modules/_remote_debugging/object_reading.c +++ b/Modules/_remote_debugging/object_reading.c @@ -194,9 +194,21 @@ read_py_long( return 0; } - // If the long object has inline digits, use them directly + // Validate size: reject garbage (negative or unreasonably large) + if (size < 0 || size > MAX_LONG_DIGITS) { + set_exception_cause(unwinder, PyExc_RuntimeError, + "Invalid PyLong size (corrupted remote memory)"); + return -1; + } + + // Calculate how many digits fit inline in our local buffer + Py_ssize_t ob_digit_offset = unwinder->debug_offsets.long_object.ob_digit; + Py_ssize_t inline_digits_space = SIZEOF_LONG_OBJ - ob_digit_offset; + Py_ssize_t max_inline_digits = inline_digits_space / (Py_ssize_t)sizeof(digit); + + // If the long object has inline digits that fit in our buffer, use them directly digit *digits; - if (size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) { + if (size <= max_inline_digits && size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) { // For small integers, digits are inline in the long_value.ob_digit array digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); if (!digits) { @@ -204,7 +216,7 @@ read_py_long( set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for small PyLong"); return -1; } - memcpy(digits, long_obj + unwinder->debug_offsets.long_object.ob_digit, size * sizeof(digit)); + memcpy(digits, long_obj + ob_digit_offset, size * sizeof(digit)); } else { // For larger integers, we need to read the digits separately digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));