Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,10 +217,13 @@ extern void _PyEval_DeactivateOpCache(void);
static inline int _Py_MakeRecCheck(PyThreadState *tstate) {
uintptr_t here_addr = _Py_get_machine_stack_pointer();
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
// Overflow if stack pointer is between soft limit and the base of the hardware stack.
// If it is below the hardware stack base, assume that we have the wrong stack limits, and do nothing.
// We could have the wrong stack limits because of limited platform support, or user-space threads.
#if _Py_STACK_GROWS_DOWN
return here_addr < _tstate->c_stack_soft_limit;
return here_addr < _tstate->c_stack_soft_limit && here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES;
#else
return here_addr > _tstate->c_stack_soft_limit;
return here_addr > _tstate->c_stack_soft_limit && here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES;
#endif
}

Expand Down
9 changes: 8 additions & 1 deletion InternalDocs/stack_protection.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,19 @@ Recursion checks are performed by `_Py_EnterRecursiveCall()` or `_Py_EnterRecurs

```python
kb_used = (stack_top - stack_pointer)>>10
if stack_pointer < hard_limit:
if stack_pointer < bottom_of_machine_stack:
pass # Our stack limits could be wrong so it is safest to do nothing.
elif stack_pointer < hard_limit:
FatalError(f"Unrecoverable stack overflow (used {kb_used} kB)")
elif stack_pointer < soft_limit:
raise RecursionError(f"Stack overflow (used {kb_used} kB)")
```

### User space threads and other oddities

Some libraries provide user-space threads. These will change the C stack at runtime.
To guard against this we only raise if the stack pointer is in the window between the expected stack base and the soft limit.

### Diagnosing and fixing stack overflows

For stack protection to work correctly the amount of stack consumed between calls to `_Py_EnterRecursiveCall()` must be less than `_PyOS_STACK_MARGIN_BYTES`.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Only raise a ``RecursionError`` or trigger a fatal error if the stack
pointer is both below the limit pointer *and* above the stack base. If
outside of these bounds assume that it is OK. This prevents false positives
when user-space threads swap stacks.
28 changes: 21 additions & 7 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,11 @@ _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count)
_Py_InitializeRecursionLimits(tstate);
}
#if _Py_STACK_GROWS_DOWN
return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES;
return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES &&
here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES;
#else
return here_addr > _tstate->c_stack_soft_limit - margin_count * _PyOS_STACK_MARGIN_BYTES;
return here_addr > _tstate->c_stack_soft_limit - margin_count * _PyOS_STACK_MARGIN_BYTES &&
here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES;
#endif
}

Expand Down Expand Up @@ -455,7 +457,7 @@ int pthread_attr_destroy(pthread_attr_t *a)
#endif

static void
hardware_stack_limits(uintptr_t *base, uintptr_t *top)
hardware_stack_limits(uintptr_t *base, uintptr_t *top, uintptr_t sp)
{
#ifdef WIN32
ULONG_PTR low, high;
Expand Down Expand Up @@ -491,10 +493,19 @@ hardware_stack_limits(uintptr_t *base, uintptr_t *top)
return;
}
# endif
uintptr_t here_addr = _Py_get_machine_stack_pointer();
uintptr_t top_addr = _Py_SIZE_ROUND_UP(here_addr, 4096);
// Add some space for caller function then round to minimum page size
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the comment.
Both “space for caller function” and “minimum page size” are guesses; what happens if they're wrong?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nothing bad. We only use the upper limit (for stack growing down) for reporting stack use in case of an overflow.
If we are wrong the report value will be off by a few kb, but it is already an estimate. We are assuming that there isn't much stack above the call to create the thread state.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to avoid calculation, and use here_addr as is? With the Py_C_STACK_SIZE guesses, 4k indeed doesn't matter.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not a perfect guess, but I don't see any reason to make it worse.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I see is unnecessary complexity: adding 60 bytes of faux precision to a guess that's in the order of megabytes is rather confusing. These look like more numbers to tweak if you get spurious overflow check failures

If this needs to go in, could you add a comment that this guess can be wildly wrong?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do.

// This is a guess at the top of the stack, but should be a reasonably
// good guess if called from _PyThreadState_Attach when creating a thread.
// If the thread is attached deep in a call stack, then the guess will be poor.
#if _Py_STACK_GROWS_DOWN
uintptr_t top_addr = _Py_SIZE_ROUND_UP(sp + 8*sizeof(void*), SYSTEM_PAGE_SIZE);
*top = top_addr;
*base = top_addr - Py_C_STACK_SIZE;
# else
uintptr_t base_addr = _Py_SIZE_ROUND_DOWN(sp - 8*sizeof(void*), SYSTEM_PAGE_SIZE);
*base = base_addr;
*top = base_addr + Py_C_STACK_SIZE;
#endif
#endif
}

Expand Down Expand Up @@ -543,7 +554,8 @@ void
_Py_InitializeRecursionLimits(PyThreadState *tstate)
{
uintptr_t base, top;
hardware_stack_limits(&base, &top);
uintptr_t here_addr = _Py_get_machine_stack_pointer();
hardware_stack_limits(&base, &top, here_addr);
assert(top != 0);

tstate_set_stack(tstate, base, top);
Expand Down Expand Up @@ -587,7 +599,7 @@ PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate)


/* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall()
if the recursion_depth reaches recursion_limit. */
if the stack pointer is between the stack base and c_stack_hard_limit. */
int
_Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
{
Expand All @@ -596,10 +608,12 @@ _Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
assert(_tstate->c_stack_soft_limit != 0);
assert(_tstate->c_stack_hard_limit != 0);
#if _Py_STACK_GROWS_DOWN
assert(here_addr >= _tstate->c_stack_hard_limit - _PyOS_STACK_MARGIN_BYTES);
if (here_addr < _tstate->c_stack_hard_limit) {
/* Overflowing while handling an overflow. Give up. */
int kbytes_used = (int)(_tstate->c_stack_top - here_addr)/1024;
#else
assert(here_addr <= _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES);
if (here_addr > _tstate->c_stack_hard_limit) {
/* Overflowing while handling an overflow. Give up. */
int kbytes_used = (int)(here_addr - _tstate->c_stack_top)/1024;
Expand Down
Loading