@@ -632,6 +632,8 @@ init_interpreter(PyInterpreterState *interp,
632632 assert (next != NULL || (interp == runtime -> interpreters .main ));
633633 interp -> next = next ;
634634
635+ interp -> threads_preallocated = & interp -> _initial_thread ;
636+
635637 // We would call _PyObject_InitState() at this point
636638 // if interp->feature_flags were alredy set.
637639
@@ -767,7 +769,6 @@ PyInterpreterState_New(void)
767769 return interp ;
768770}
769771
770-
771772static void
772773interpreter_clear (PyInterpreterState * interp , PyThreadState * tstate )
773774{
@@ -906,6 +907,8 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
906907 // XXX Once we have one allocator per interpreter (i.e.
907908 // per-interpreter GC) we must ensure that all of the interpreter's
908909 // objects have been cleaned up at the point.
910+
911+ // If we had a freelist of thread states, we would clear it here.
909912}
910913
911914
@@ -1427,22 +1430,45 @@ allocate_chunk(int size_in_bytes, _PyStackChunk* previous)
14271430 return res ;
14281431}
14291432
1433+ static void
1434+ reset_threadstate (_PyThreadStateImpl * tstate )
1435+ {
1436+ // Set to _PyThreadState_INIT directly?
1437+ memcpy (tstate ,
1438+ & initial ._main_interpreter ._initial_thread ,
1439+ sizeof (* tstate ));
1440+ }
1441+
14301442static _PyThreadStateImpl *
1431- alloc_threadstate (void )
1443+ alloc_threadstate (PyInterpreterState * interp )
14321444{
1433- return PyMem_RawCalloc (1 , sizeof (_PyThreadStateImpl ));
1445+ _PyThreadStateImpl * tstate ;
1446+
1447+ // Try the preallocated tstate first.
1448+ tstate = _Py_atomic_exchange_ptr (& interp -> threads_preallocated , NULL );
1449+
1450+ // Fall back to the allocator.
1451+ if (tstate == NULL ) {
1452+ tstate = PyMem_RawCalloc (1 , sizeof (_PyThreadStateImpl ));
1453+ if (tstate == NULL ) {
1454+ return NULL ;
1455+ }
1456+ reset_threadstate (tstate );
1457+ }
1458+ return tstate ;
14341459}
14351460
14361461static void
14371462free_threadstate (_PyThreadStateImpl * tstate )
14381463{
1464+ PyInterpreterState * interp = tstate -> base .interp ;
14391465 // The initial thread state of the interpreter is allocated
14401466 // as part of the interpreter state so should not be freed.
1441- if (tstate == & tstate -> base . interp -> _initial_thread ) {
1442- // Restore to _PyThreadState_INIT .
1443- memcpy (tstate ,
1444- & initial . _main_interpreter . _initial_thread ,
1445- sizeof ( * tstate ) );
1467+ if (tstate == & interp -> _initial_thread ) {
1468+ // Make it available again .
1469+ reset_threadstate (tstate );
1470+ assert ( interp -> threads_preallocated == NULL );
1471+ _Py_atomic_store_ptr ( & interp -> threads_preallocated , tstate );
14461472 }
14471473 else {
14481474 PyMem_RawFree (tstate );
@@ -1533,68 +1559,42 @@ add_threadstate(PyInterpreterState *interp, PyThreadState *tstate,
15331559static PyThreadState *
15341560new_threadstate (PyInterpreterState * interp , int whence )
15351561{
1536- _PyThreadStateImpl * tstate ;
1537- _PyRuntimeState * runtime = interp -> runtime ;
1538- // We don't need to allocate a thread state for the main interpreter
1539- // (the common case), but doing it later for the other case revealed a
1540- // reentrancy problem (deadlock). So for now we always allocate before
1541- // taking the interpreters lock. See GH-96071.
1542- _PyThreadStateImpl * new_tstate = alloc_threadstate ();
1543- int used_newtstate ;
1544- if (new_tstate == NULL ) {
1562+ // Allocate the thread state.
1563+ _PyThreadStateImpl * tstate = alloc_threadstate (interp );
1564+ if (tstate == NULL ) {
15451565 return NULL ;
15461566 }
1567+
15471568#ifdef Py_GIL_DISABLED
15481569 Py_ssize_t qsbr_idx = _Py_qsbr_reserve (interp );
15491570 if (qsbr_idx < 0 ) {
1550- PyMem_RawFree ( new_tstate );
1571+ free_threadstate ( tstate );
15511572 return NULL ;
15521573 }
15531574#endif
15541575
15551576 /* We serialize concurrent creation to protect global state. */
1556- HEAD_LOCK (runtime );
1577+ HEAD_LOCK (interp -> runtime );
15571578
1579+ // Initialize the new thread state.
15581580 interp -> threads .next_unique_id += 1 ;
15591581 uint64_t id = interp -> threads .next_unique_id ;
1582+ init_threadstate (tstate , interp , id , whence );
15601583
1561- // Allocate the thread state and add it to the interpreter.
1584+ // Add the new thread state to the interpreter.
15621585 PyThreadState * old_head = interp -> threads .head ;
1563- if (old_head == NULL ) {
1564- // It's the interpreter's initial thread state.
1565- used_newtstate = 0 ;
1566- tstate = & interp -> _initial_thread ;
1567- }
1568- // XXX Re-use interp->_initial_thread if not in use?
1569- else {
1570- // Every valid interpreter must have at least one thread.
1571- assert (id > 1 );
1572- assert (old_head -> prev == NULL );
1573- used_newtstate = 1 ;
1574- tstate = new_tstate ;
1575- // Set to _PyThreadState_INIT.
1576- memcpy (tstate ,
1577- & initial ._main_interpreter ._initial_thread ,
1578- sizeof (* tstate ));
1579- }
1580-
1581- init_threadstate (tstate , interp , id , whence );
15821586 add_threadstate (interp , (PyThreadState * )tstate , old_head );
15831587
1584- HEAD_UNLOCK (runtime );
1585- if (!used_newtstate ) {
1586- // Must be called with lock unlocked to avoid re-entrancy deadlock.
1587- PyMem_RawFree (new_tstate );
1588- }
1589- else {
1588+ HEAD_UNLOCK (interp -> runtime );
15901589#ifdef Py_GIL_DISABLED
1590+ if (id == 1 ) {
15911591 if (_Py_atomic_load_int (& interp -> gc .immortalize ) == 0 ) {
15921592 // Immortalize objects marked as using deferred reference counting
15931593 // the first time a non-main thread is created.
15941594 _PyGC_ImmortalizeDeferredObjects (interp );
15951595 }
1596- #endif
15971596 }
1597+ #endif
15981598
15991599#ifdef Py_GIL_DISABLED
16001600 // Must be called with lock unlocked to avoid lock ordering deadlocks.
0 commit comments