Skip to content

Commit a679366

Browse files
authored
GH-126910: Make _Py_get_machine_stack_pointer return the actual stack pointer (GH-149103)
* Make _Py_ReachedRecursionLimit inline again * Remove _Py_MakeRecCheck replacing its use with _Py_ReachedRecursionLimit * Move the check for C stack swtiching into _Py_CheckRecursiveCall
1 parent d8a97cb commit a679366

6 files changed

Lines changed: 33 additions & 51 deletions

File tree

Include/internal/pycore_ceval.h

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -208,16 +208,16 @@ extern void _PyEval_DeactivateOpCache(void);
208208

209209
/* --- _Py_EnterRecursiveCall() ----------------------------------------- */
210210

211-
static inline int _Py_MakeRecCheck(PyThreadState *tstate) {
211+
static inline int _Py_ReachedRecursionLimit(PyThreadState *tstate) {
212212
uintptr_t here_addr = _Py_get_machine_stack_pointer();
213213
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
214-
// Overflow if stack pointer is between soft limit and the base of the hardware stack.
215-
// If it is below the hardware stack base, assume that we have the wrong stack limits, and do nothing.
216-
// We could have the wrong stack limits because of limited platform support, or user-space threads.
214+
// Possible overflow if stack pointer is beyond the soft limit.
215+
// _Py_CheckRecursiveCall will check for corner cases and
216+
// report an error if there is an overflow.
217217
#if _Py_STACK_GROWS_DOWN
218-
return here_addr < _tstate->c_stack_soft_limit && here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES;
218+
return here_addr < _tstate->c_stack_soft_limit;
219219
#else
220-
return here_addr > _tstate->c_stack_soft_limit && here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES;
220+
return here_addr > _tstate->c_stack_soft_limit;
221221
#endif
222222
}
223223

@@ -232,7 +232,7 @@ PyAPI_FUNC(int) _Py_CheckRecursiveCallPy(
232232

233233
static inline int _Py_EnterRecursiveCallTstate(PyThreadState *tstate,
234234
const char *where) {
235-
return (_Py_MakeRecCheck(tstate) && _Py_CheckRecursiveCall(tstate, where));
235+
return (_Py_ReachedRecursionLimit(tstate) && _Py_CheckRecursiveCall(tstate, where));
236236
}
237237

238238
static inline int _Py_EnterRecursiveCall(const char *where) {
@@ -246,8 +246,6 @@ static inline void _Py_LeaveRecursiveCallTstate(PyThreadState *tstate) {
246246

247247
PyAPI_FUNC(void) _Py_InitializeRecursionLimits(PyThreadState *tstate);
248248

249-
PyAPI_FUNC(int) _Py_ReachedRecursionLimit(PyThreadState *tstate);
250-
251249
// Export for test_peg_generator
252250
PyAPI_FUNC(int) _Py_ReachedRecursionLimitWithMargin(
253251
PyThreadState *tstate,

Include/internal/pycore_pystate.h

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -316,15 +316,20 @@ static uintptr_t return_pointer_as_int(char* p) {
316316

317317
static inline uintptr_t
318318
_Py_get_machine_stack_pointer(void) {
319-
#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__)
320-
return (uintptr_t)__builtin_frame_address(0);
321-
#elif defined(_MSC_VER)
322-
return (uintptr_t)_AddressOfReturnAddress();
319+
uintptr_t result;
320+
#if defined(_M_ARM64)
321+
result = __getReg(31);
322+
#elif defined(_M_X64) || defined(_M_IX86)
323+
result = (uintptr_t)_AddressOfReturnAddress();
324+
#elif defined(__aarch64__)
325+
__asm__ ("mov %0, sp" : "=r" (result));
326+
#elif defined(__x86_64__)
327+
__asm__("{movq %%rsp, %0" : "=r" (result));
323328
#else
324329
char here;
325-
/* Avoid compiler warning about returning stack address */
326-
return return_pointer_as_int(&here);
330+
result = (uintptr_t)&here;
327331
#endif
332+
return result;
328333
}
329334

330335
static inline intptr_t

Include/internal/pycore_pythonrun.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ extern PyObject * _Py_CompileStringObjectWithModule(
4646
* stack consumption of PyEval_EvalDefault */
4747
#if (defined(Py_DEBUG) \
4848
|| defined(_Py_ADDRESS_SANITIZER) \
49-
|| defined(_Py_THREAD_SANITIZER))
49+
|| defined(_Py_THREAD_SANITIZER)) \
50+
|| defined(_Py_UNDEFINED_BEHAVIOR_SANITIZER)
5051
# define _PyOS_LOG2_STACK_MARGIN 12
5152
#else
5253
# define _PyOS_LOG2_STACK_MARGIN 11

Lib/test/test_pyexpat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,7 @@ def test_trigger_leak(self):
861861
def test_deeply_nested_content_model(self):
862862
# This should raise a RecursionError and not crash.
863863
# See https://github.com/python/cpython/issues/145986.
864-
N = 500_000
864+
N = 800_000
865865
data = (
866866
b'<!DOCTYPE root [\n<!ELEMENT root '
867867
+ b'(a, ' * N + b'a' + b')' * N

Python/ceval.c

Lines changed: 11 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -49,20 +49,6 @@ _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count)
4949
#endif
5050
}
5151

52-
void
53-
_Py_EnterRecursiveCallUnchecked(PyThreadState *tstate)
54-
{
55-
uintptr_t here_addr = _Py_get_machine_stack_pointer();
56-
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
57-
#if _Py_STACK_GROWS_DOWN
58-
if (here_addr < _tstate->c_stack_hard_limit) {
59-
#else
60-
if (here_addr > _tstate->c_stack_hard_limit) {
61-
#endif
62-
Py_FatalError("Unchecked stack overflow.");
63-
}
64-
}
65-
6652
#if defined(__s390x__)
6753
# define Py_C_STACK_SIZE 320000
6854
#elif defined(_WIN32)
@@ -278,7 +264,7 @@ PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate)
278264

279265

280266
/* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall()
281-
if the stack pointer is between the stack base and c_stack_hard_limit. */
267+
if the stack pointer is beyond c_stack_soft_limit. */
282268
int
283269
_Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
284270
{
@@ -287,16 +273,21 @@ _Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
287273
assert(_tstate->c_stack_soft_limit != 0);
288274
assert(_tstate->c_stack_hard_limit != 0);
289275
#if _Py_STACK_GROWS_DOWN
290-
assert(here_addr >= _tstate->c_stack_hard_limit - _PyOS_STACK_MARGIN_BYTES);
291276
if (here_addr < _tstate->c_stack_hard_limit) {
292-
/* Overflowing while handling an overflow. Give up. */
277+
if (here_addr < _tstate->c_stack_hard_limit - _PyOS_STACK_MARGIN_BYTES) {
278+
// Far out of bounds -- Assume stack switching has occurred
279+
return 0;
280+
}
293281
int kbytes_used = (int)(_tstate->c_stack_top - here_addr)/1024;
294282
#else
295-
assert(here_addr <= _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES);
296283
if (here_addr > _tstate->c_stack_hard_limit) {
297-
/* Overflowing while handling an overflow. Give up. */
284+
if (here_addr > _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES) {
285+
// Far out of bounds -- Assume stack switching has occurred
286+
return 0;
287+
}
298288
int kbytes_used = (int)(here_addr - _tstate->c_stack_top)/1024;
299289
#endif
290+
/* Too much stack used to safely raise an exception. Give up. */
300291
char buffer[80];
301292
snprintf(buffer, 80, "Unrecoverable stack overflow (used %d kB)%s", kbytes_used, where);
302293
Py_FatalError(buffer);
@@ -1146,19 +1137,6 @@ _PyEval_GetIter(_PyStackRef iterable, _PyStackRef *index_or_null, int yield_from
11461137
return PyStackRef_FromPyObjectSteal(iter_o);
11471138
}
11481139

1149-
Py_NO_INLINE int
1150-
_Py_ReachedRecursionLimit(PyThreadState *tstate) {
1151-
uintptr_t here_addr = _Py_get_machine_stack_pointer();
1152-
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
1153-
assert(_tstate->c_stack_hard_limit != 0);
1154-
#if _Py_STACK_GROWS_DOWN
1155-
return here_addr <= _tstate->c_stack_soft_limit;
1156-
#else
1157-
return here_addr >= _tstate->c_stack_soft_limit;
1158-
#endif
1159-
}
1160-
1161-
11621140
#if (defined(__GNUC__) && __GNUC__ >= 10 && !defined(__clang__)) && defined(__x86_64__)
11631141
/*
11641142
* gh-129987: The SLP autovectorizer can cause poor code generation for
@@ -1169,7 +1147,7 @@ _Py_ReachedRecursionLimit(PyThreadState *tstate) {
11691147
* (prior to GCC 9, 40% performance drop), so we have to selectively disable
11701148
* it.
11711149
*/
1172-
#define DONT_SLP_VECTORIZE __attribute__((optimize ("no-tree-slp-vectorize")))
1150+
#define DONT_SLP_VECTORIZE __attribute__((optimize ("no-tree-slp-vectorize", "no-omit-frame-pointer")))
11731151
#else
11741152
#define DONT_SLP_VECTORIZE
11751153
#endif

Python/jit.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
#include "pycore_jit.h"
3535

36-
// Memory management stuff: ////////////////////////////////////////////////////
36+
// Memory management stuff: ///////////////////////////////////////////////////
3737

3838
#ifndef MS_WINDOWS
3939
#include <sys/mman.h>

0 commit comments

Comments
 (0)