From 750609c0a729c44a3ca8b0f5e543b79ac0780a8b Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 25 Jul 2025 22:31:44 +0200 Subject: [PATCH 01/12] Use System allocator for thread-local storage --- .../src/sys/thread_local/destructors/list.rs | 15 ++--- library/std/src/sys/thread_local/key/xous.rs | 6 +- library/std/src/sys/thread_local/os.rs | 26 ++++---- library/std/src/thread/mod.rs | 14 ++-- .../threads-sendsync/tls-in-global-alloc.rs | 64 +++++++++++++++++++ 5 files changed, 98 insertions(+), 27 deletions(-) create mode 100644 tests/ui/threads-sendsync/tls-in-global-alloc.rs diff --git a/library/std/src/sys/thread_local/destructors/list.rs b/library/std/src/sys/thread_local/destructors/list.rs index b9d5214c438d2..606694cc78d79 100644 --- a/library/std/src/sys/thread_local/destructors/list.rs +++ b/library/std/src/sys/thread_local/destructors/list.rs @@ -1,19 +1,14 @@ +use crate::alloc::System; use crate::cell::RefCell; use crate::sys::thread_local::guard; #[thread_local] -static DTORS: RefCell> = RefCell::new(Vec::new()); +static DTORS: RefCell> = + RefCell::new(Vec::new_in(System)); pub unsafe fn register(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { - let Ok(mut dtors) = DTORS.try_borrow_mut() else { - // This point can only be reached if the global allocator calls this - // function again. - // FIXME: maybe use the system allocator instead? - rtabort!("the global allocator may not use TLS with destructors"); - }; - + let Ok(mut dtors) = DTORS.try_borrow_mut() else { rtabort!("unreachable") }; guard::enable(); - dtors.push((t, dtor)); } @@ -36,7 +31,7 @@ pub unsafe fn run() { } None => { // Free the list memory. - *dtors = Vec::new(); + *dtors = Vec::new_in(System); break; } } diff --git a/library/std/src/sys/thread_local/key/xous.rs b/library/std/src/sys/thread_local/key/xous.rs index a27cec5ca1a60..529178f34757b 100644 --- a/library/std/src/sys/thread_local/key/xous.rs +++ b/library/std/src/sys/thread_local/key/xous.rs @@ -38,6 +38,7 @@ use core::arch::asm; +use crate::alloc::System; use crate::mem::ManuallyDrop; use crate::os::xous::ffi::{MemoryFlags, map_memory, unmap_memory}; use crate::ptr; @@ -151,7 +152,10 @@ struct Node { } unsafe fn register_dtor(key: Key, dtor: Dtor) { - let mut node = ManuallyDrop::new(Box::new(Node { key, dtor, next: ptr::null_mut() })); + // We use the System allocator here to avoid interfering with a potential + // Global allocator using thread-local storage. + let mut node = + ManuallyDrop::new(Box::new_in(Node { key, dtor, next: ptr::null_mut() }, System)); #[allow(unused_unsafe)] let mut head = unsafe { DTORS.load(Acquire) }; diff --git a/library/std/src/sys/thread_local/os.rs b/library/std/src/sys/thread_local/os.rs index 9f7a29236e926..07b93a2cbbc34 100644 --- a/library/std/src/sys/thread_local/os.rs +++ b/library/std/src/sys/thread_local/os.rs @@ -1,6 +1,6 @@ use super::key::{Key, LazyKey, get, set}; use super::{abort_on_dtor_unwind, guard}; -use crate::alloc::{self, Layout}; +use crate::alloc::{self, GlobalAlloc, Layout, System}; use crate::cell::Cell; use crate::marker::PhantomData; use crate::mem::ManuallyDrop; @@ -113,17 +113,19 @@ pub const fn value_align() -> usize { crate::mem::align_of::>() } -/// Equivalent to `Box>`, but potentially over-aligned. -struct AlignedBox { +/// Equivalent to `Box, System>`, but potentially over-aligned. +struct AlignedSystemBox { ptr: NonNull>, } -impl AlignedBox { +impl AlignedSystemBox { #[inline] fn new(v: Value) -> Self { let layout = Layout::new::>().align_to(ALIGN).unwrap(); - let ptr: *mut Value = (unsafe { alloc::alloc(layout) }).cast(); + // We use the System allocator here to avoid interfering with a potential + // Global allocator using thread-local storage. + let ptr: *mut Value = (unsafe { System.alloc(layout) }).cast(); let Some(ptr) = NonNull::new(ptr) else { alloc::handle_alloc_error(layout); }; @@ -143,7 +145,7 @@ impl AlignedBox { } } -impl Deref for AlignedBox { +impl Deref for AlignedSystemBox { type Target = Value; #[inline] @@ -152,14 +154,14 @@ impl Deref for AlignedBox { } } -impl Drop for AlignedBox { +impl Drop for AlignedSystemBox { #[inline] fn drop(&mut self) { let layout = Layout::new::>().align_to(ALIGN).unwrap(); unsafe { let unwind_result = catch_unwind(AssertUnwindSafe(|| self.ptr.drop_in_place())); - alloc::dealloc(self.ptr.as_ptr().cast(), layout); + System.dealloc(self.ptr.as_ptr().cast(), layout); if let Err(payload) = unwind_result { resume_unwind(payload); } @@ -205,11 +207,11 @@ impl Storage { return ptr::null(); } - let value = AlignedBox::::new(Value { + let value = AlignedSystemBox::::new(Value { value: i.and_then(Option::take).unwrap_or_else(f), key, }); - let ptr = AlignedBox::into_raw(value); + let ptr = AlignedSystemBox::into_raw(value); // SAFETY: // * key came from a `LazyKey` and is thus correct. @@ -227,7 +229,7 @@ impl Storage { // initializer has already returned and the next scope only starts // after we return the pointer. Therefore, there can be no references // to the old value. - drop(unsafe { AlignedBox::::from_raw(old) }); + drop(unsafe { AlignedSystemBox::::from_raw(old) }); } // SAFETY: We just created this value above. @@ -246,7 +248,7 @@ unsafe extern "C" fn destroy_value(ptr: *mut u8) // Note that to prevent an infinite loop we reset it back to null right // before we return from the destructor ourselves. abort_on_dtor_unwind(|| { - let ptr = unsafe { AlignedBox::::from_raw(ptr as *mut Value) }; + let ptr = unsafe { AlignedSystemBox::::from_raw(ptr as *mut Value) }; let key = ptr.key; // SAFETY: `key` is the TLS key `ptr` was stored under. unsafe { set(key, ptr::without_provenance_mut(1)) }; diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index 16313da8e178c..88da03b1d3ca5 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -158,6 +158,7 @@ #[cfg(all(test, not(any(target_os = "emscripten", target_os = "wasi"))))] mod tests; +use crate::alloc::System; use crate::any::Any; use crate::cell::UnsafeCell; use crate::ffi::CStr; @@ -1465,7 +1466,10 @@ impl Inner { /// /// [`thread::current`]: current::current pub struct Thread { - inner: Pin>, + // We use the System allocator such that creating or dropping this handle + // does not interfere with a potential Global allocator using thread-local + // storage. + inner: Pin>, } impl Thread { @@ -1478,7 +1482,7 @@ impl Thread { // SAFETY: We pin the Arc immediately after creation, so its address never // changes. let inner = unsafe { - let mut arc = Arc::::new_uninit(); + let mut arc = Arc::::new_uninit_in(System); let ptr = Arc::get_mut_unchecked(&mut arc).as_mut_ptr(); (&raw mut (*ptr).name).write(name); (&raw mut (*ptr).id).write(id); @@ -1649,7 +1653,7 @@ impl Thread { pub fn into_raw(self) -> *const () { // Safety: We only expose an opaque pointer, which maintains the `Pin` invariant. let inner = unsafe { Pin::into_inner_unchecked(self.inner) }; - Arc::into_raw(inner) as *const () + Arc::into_raw_with_allocator(inner).0 as *const () } /// Constructs a `Thread` from a raw pointer. @@ -1671,7 +1675,9 @@ impl Thread { #[unstable(feature = "thread_raw", issue = "97523")] pub unsafe fn from_raw(ptr: *const ()) -> Thread { // Safety: Upheld by caller. - unsafe { Thread { inner: Pin::new_unchecked(Arc::from_raw(ptr as *const Inner)) } } + unsafe { + Thread { inner: Pin::new_unchecked(Arc::from_raw_in(ptr as *const Inner, System)) } + } } fn cname(&self) -> Option<&CStr> { diff --git a/tests/ui/threads-sendsync/tls-in-global-alloc.rs b/tests/ui/threads-sendsync/tls-in-global-alloc.rs new file mode 100644 index 0000000000000..82a96ce9a6c3a --- /dev/null +++ b/tests/ui/threads-sendsync/tls-in-global-alloc.rs @@ -0,0 +1,64 @@ +//@ run-pass +//@ needs-threads + +use std::alloc::{GlobalAlloc, Layout, System}; +use std::thread::Thread; +use std::sync::atomic::{AtomicUsize, AtomicBool, Ordering}; + +static GLOBAL: AtomicUsize = AtomicUsize::new(0); + +struct Local(Thread); + +thread_local! { + static LOCAL: Local = { + GLOBAL.fetch_or(1, Ordering::Relaxed); + Local(std::thread::current()) + }; +} + +impl Drop for Local { + fn drop(&mut self) { + GLOBAL.fetch_or(2, Ordering::Relaxed); + } +} + +static SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS: AtomicBool = AtomicBool::new(false); + +#[global_allocator] +static ALLOC: Alloc = Alloc; +struct Alloc; + +unsafe impl GlobalAlloc for Alloc { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + // Make sure we aren't re-entrant. + assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed)); + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed); + LOCAL.with(|local| { + assert!(local.0.id() == std::thread::current().id()); + }); + let ret = unsafe { System.alloc(layout) }; + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed); + ret + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + // Make sure we aren't re-entrant. + assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed)); + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed); + LOCAL.with(|local| { + assert!(local.0.id() == std::thread::current().id()); + }); + unsafe { System.dealloc(ptr, layout) } + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed); + } +} + +fn main() { + std::thread::spawn(|| { + std::hint::black_box(vec![1, 2]); + assert!(GLOBAL.load(Ordering::Relaxed) == 1); + }) + .join() + .unwrap(); + assert!(GLOBAL.load(Ordering::Relaxed) == 3); +} From 8481b827fbdfcf3e58c19763a659725d43ad7819 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Sat, 26 Jul 2025 14:30:00 +0200 Subject: [PATCH 02/12] Use spinlock for ThreadId if 64-bit atomic unavailable --- library/std/src/thread/mod.rs | 48 ++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index 88da03b1d3ca5..238682cef3fa2 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -1258,21 +1258,45 @@ impl ThreadId { } } _ => { - use crate::sync::{Mutex, PoisonError}; - - static COUNTER: Mutex = Mutex::new(0); + use crate::cell::SyncUnsafeCell; + use crate::hint::spin_loop; + use crate::sync::atomic::{Atomic, AtomicBool}; + use crate::thread::yield_now; + + // If we don't have a 64-bit atomic we use a small spinlock. We don't use Mutex + // here as we might be trying to get the current thread id in the global allocator, + // and on some platforms Mutex requires allocation. + static COUNTER_LOCKED: Atomic = AtomicBool::new(false); + static COUNTER: SyncUnsafeCell = SyncUnsafeCell::new(0); + + // Acquire lock. + let mut spin = 0; + while COUNTER_LOCKED.compare_exchange_weak(false, true, Ordering::Acquire, Ordering::Relaxed).is_err() { + if spin <= 3 { + for _ in 0..(1 << spin) { + spin_loop(); + } + } else { + yield_now(); + } + spin += 1; + } - let mut counter = COUNTER.lock().unwrap_or_else(PoisonError::into_inner); - let Some(id) = counter.checked_add(1) else { - // in case the panic handler ends up calling `ThreadId::new()`, - // avoid reentrant lock acquire. - drop(counter); - exhausted(); + let id; + // SAFETY: we have an exclusive lock on the counter. + unsafe { + id = (*COUNTER.get()).saturating_add(1); + (*COUNTER.get()) = id; }; - *counter = id; - drop(counter); - ThreadId(NonZero::new(id).unwrap()) + // Release the lock. + COUNTER_LOCKED.store(false, Ordering::Release); + + if id == u64::MAX { + exhausted() + } else { + ThreadId(NonZero::new(id).unwrap()) + } } } } From f9b0811d01618679e479482924940320a5c2eef9 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Tue, 29 Jul 2025 19:43:29 +0200 Subject: [PATCH 03/12] Add documentation guaranteeing global allocator use of TLS Remove outdated part of comment claiming thread_local re-enters global allocator Fix typo in doc comment Add comments for guarantees given and footnote that System may still be called Revise mention of using the global allocator Allow for the possibility that the global allocator is the system allocator. Co-authored-by: Mark Rousskov --- library/core/src/alloc/global.rs | 25 +++++++++++++++++++++++++ library/std/src/alloc.rs | 9 ++++++++- library/std/src/thread/current.rs | 19 ++++++++++--------- library/std/src/thread/local.rs | 7 ++++++- 4 files changed, 49 insertions(+), 11 deletions(-) diff --git a/library/core/src/alloc/global.rs b/library/core/src/alloc/global.rs index e2413b619f9fa..6447fde0cd8a9 100644 --- a/library/core/src/alloc/global.rs +++ b/library/core/src/alloc/global.rs @@ -115,6 +115,31 @@ use crate::{cmp, ptr}; /// Whether allocations happen or not is not part of the program behavior, even if it /// could be detected via an allocator that tracks allocations by printing or otherwise /// having side effects. +/// +/// # Re-entrance +/// +/// When implementing a global allocator one has to be careful not to create an infinitely recursive +/// implementation by accident, as many constructs in the Rust standard library may allocate in +/// their implementation. For example, on some platforms [`std::sync::Mutex`] may allocate, so using +/// it is highly problematic in a global allocator. +/// +/// Generally speaking for this reason one should stick to library features available through +/// [`core`], and avoid using [`std`] in a global allocator. A few features from [`std`] are +/// guaranteed to not use `#[global_allocator]` to allocate: +/// +/// - [`std::thread_local`], +/// - [`std::thread::current`], +/// - [`std::thread::park`] and [`std::thread::Thread`]'s [`unpark`] method and +/// [`Clone`] implementation. +/// +/// [`std`]: ../../std/index.html +/// [`std::sync::Mutex`]: ../../std/sync/struct.Mutex.html +/// [`std::thread_local`]: ../../std/macro.thread_local.html +/// [`std::thread::current`]: ../../std/thread/fn.current.html +/// [`std::thread::park`]: ../../std/thread/fn.park.html +/// [`std::thread::Thread`]: ../../std/thread/struct.Thread.html +/// [`unpark`]: ../../std/thread/struct.Thread.html#method.unpark + #[stable(feature = "global_alloc", since = "1.28.0")] pub unsafe trait GlobalAlloc { /// Allocates memory as described by the given `layout`. diff --git a/library/std/src/alloc.rs b/library/std/src/alloc.rs index daa25c5a50dd6..63a87fb355a33 100644 --- a/library/std/src/alloc.rs +++ b/library/std/src/alloc.rs @@ -11,7 +11,7 @@ //! //! This attribute allows configuring the choice of global allocator. //! You can use this to implement a completely custom global allocator -//! to route all default allocation requests to a custom object. +//! to route all[^system-alloc] default allocation requests to a custom object. //! //! ```rust //! use std::alloc::{GlobalAlloc, System, Layout}; @@ -52,6 +52,13 @@ //! //! The `#[global_allocator]` can only be used once in a crate //! or its recursive dependencies. +//! +//! [^system-alloc]: Note that the Rust standard library internals may still +//! directly call [`System`] when necessary (for example for the runtime +//! support typically required to implement a global allocator, see [re-entrance] on [`GlobalAlloc`] +//! for more details). +//! +//! [re-entrance]: trait.GlobalAlloc.html#re-entrance #![deny(unsafe_op_in_unsafe_fn)] #![stable(feature = "alloc_module", since = "1.28.0")] diff --git a/library/std/src/thread/current.rs b/library/std/src/thread/current.rs index f00212bfcb617..c4619a80021a6 100644 --- a/library/std/src/thread/current.rs +++ b/library/std/src/thread/current.rs @@ -270,15 +270,16 @@ fn init_current(current: *mut ()) -> Thread { // extra TLS write above shouldn't matter. The alternative is nearly always // a stack overflow. - // If you came across this message, contact the author of your allocator. - // If you are said author: A surprising amount of functions inside the - // standard library (e.g. `Mutex`, `thread_local!`, `File` when using long - // paths, even `panic!` when using unwinding), need memory allocation, so - // you'll get circular dependencies all over the place when using them. - // I (joboet) highly recommend using only APIs from core in your allocator - // and implementing your own system abstractions. Still, if you feel that - // a particular API should be entirely allocation-free, feel free to open - // an issue on the Rust repository, we'll see what we can do. + // If you came across this message, contact the author of your + // allocator. If you are said author: A surprising amount of functions + // inside the standard library (e.g. `Mutex`, `File` when using long + // paths, even `panic!` when using unwinding), need memory allocation, + // so you'll get circular dependencies all over the place when using + // them. I (joboet) highly recommend using only APIs from core in your + // allocator and implementing your own system abstractions. Still, if + // you feel that a particular API should be entirely allocation-free, + // feel free to open an issue on the Rust repository, we'll see what we + // can do. rtabort!( "\n\ Attempted to access thread-local data while allocating said data.\n\ diff --git a/library/std/src/thread/local.rs b/library/std/src/thread/local.rs index 4259a4d1f3b7c..06e4b252fc8f3 100644 --- a/library/std/src/thread/local.rs +++ b/library/std/src/thread/local.rs @@ -24,12 +24,17 @@ use crate::fmt; /// [`with`]) within a thread, and values that implement [`Drop`] get /// destructed when a thread exits. Some platform-specific caveats apply, which /// are explained below. -/// Note that if the destructor panics, the whole process will be [aborted]. +/// Note that, should the destructor panic, the whole process will be [aborted]. +/// On platforms where initialization requires memory allocation, this is +/// performed directly through [`System`], allowing the [global allocator] +/// to make use of thread local storage. /// /// A `LocalKey`'s initializer cannot recursively depend on itself. Using a /// `LocalKey` in this way may cause panics, aborts, or infinite recursion on /// the first call to `with`. /// +/// [`System`]: crate::alloc::System +/// [global allocator]: crate::alloc /// [aborted]: crate::process::abort /// /// # Single-thread Synchronization From 58830def36068871995e8b371d22744e786396a8 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Sun, 21 Sep 2025 16:37:43 +0200 Subject: [PATCH 04/12] Ensure set_current is called early during thread init --- library/std/src/sys/thread/hermit.rs | 34 ++++++------ library/std/src/sys/thread/solid.rs | 24 ++++---- library/std/src/sys/thread/teeos.rs | 31 ++++------- library/std/src/sys/thread/unix.rs | 31 +++++------ library/std/src/sys/thread/unsupported.rs | 7 +-- library/std/src/sys/thread/wasip1.rs | 27 ++++----- library/std/src/sys/thread/windows.rs | 31 +++++------ library/std/src/sys/thread/xous.rs | 21 +++---- library/std/src/thread/mod.rs | 67 +++++++++++++++-------- 9 files changed, 136 insertions(+), 137 deletions(-) diff --git a/library/std/src/sys/thread/hermit.rs b/library/std/src/sys/thread/hermit.rs index 4d9f3b114c2a0..faeaa9ae2dfcc 100644 --- a/library/std/src/sys/thread/hermit.rs +++ b/library/std/src/sys/thread/hermit.rs @@ -1,4 +1,5 @@ use crate::num::NonZero; +use crate::thread::ThreadInit; use crate::time::Duration; use crate::{io, ptr}; @@ -16,14 +17,14 @@ pub const DEFAULT_MIN_STACK_SIZE: usize = 1 << 20; impl Thread { pub unsafe fn new_with_coreid( stack: usize, - p: Box, + init: Box, core_id: isize, ) -> io::Result { - let p = Box::into_raw(Box::new(p)); + let data = Box::into_raw(init); let tid = unsafe { hermit_abi::spawn2( thread_start, - p.expose_provenance(), + data.expose_provenance(), hermit_abi::Priority::into(hermit_abi::NORMAL_PRIO), stack, core_id, @@ -31,35 +32,34 @@ impl Thread { }; return if tid == 0 { - // The thread failed to start and as a result p was not consumed. Therefore, it is + // The thread failed to start and as a result data was not consumed. Therefore, it is // safe to reconstruct the box so that it gets deallocated. unsafe { - drop(Box::from_raw(p)); + drop(Box::from_raw(data)); } Err(io::const_error!(io::ErrorKind::Uncategorized, "unable to create thread!")) } else { Ok(Thread { tid }) }; - extern "C" fn thread_start(main: usize) { - unsafe { - // Finally, let's run some code. - Box::from_raw(ptr::with_exposed_provenance::>(main).cast_mut())(); + extern "C" fn thread_start(data: usize) { + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = + unsafe { Box::from_raw(ptr::with_exposed_provenance_mut::(data)) }; + let rust_start = init.init(); + rust_start(); - // run all destructors + // Run all destructors. + unsafe { crate::sys::thread_local::destructors::run(); - crate::rt::thread_cleanup(); } + crate::rt::thread_cleanup(); } } - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { + pub unsafe fn new(stack: usize, init: Box) -> io::Result { unsafe { - Thread::new_with_coreid(stack, p, -1 /* = no specific core */) + Thread::new_with_coreid(stack, init, -1 /* = no specific core */) } } diff --git a/library/std/src/sys/thread/solid.rs b/library/std/src/sys/thread/solid.rs index 46a84faa80225..5953c0e7b6129 100644 --- a/library/std/src/sys/thread/solid.rs +++ b/library/std/src/sys/thread/solid.rs @@ -8,6 +8,7 @@ use crate::sync::atomic::{Atomic, AtomicUsize, Ordering}; use crate::sys::pal::itron::error::{ItronError, expect_success, expect_success_aborting}; use crate::sys::pal::itron::time::dur2reltims; use crate::sys::pal::itron::{abi, task}; +use crate::thread::ThreadInit; use crate::time::Duration; use crate::{hint, io}; @@ -27,9 +28,9 @@ unsafe impl Sync for Thread {} /// State data shared between a parent thread and child thread. It's dropped on /// a transition to one of the final states. struct ThreadInner { - /// This field is used on thread creation to pass a closure from + /// This field is used on thread creation to pass initialization data from /// `Thread::new` to the created task. - start: UnsafeCell>>, + init: UnsafeCell>>, /// A state machine. Each transition is annotated with `[...]` in the /// source code. @@ -65,7 +66,7 @@ struct ThreadInner { lifecycle: Atomic, } -// Safety: The only `!Sync` field, `ThreadInner::start`, is only touched by +// Safety: The only `!Sync` field, `ThreadInner::init`, is only touched by // the task represented by `ThreadInner`. unsafe impl Sync for ThreadInner {} @@ -84,13 +85,9 @@ impl Thread { /// # Safety /// /// See `thread::Builder::spawn_unchecked` for safety requirements. - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { + pub unsafe fn new(stack: usize, init: Box) -> io::Result { let inner = Box::new(ThreadInner { - start: UnsafeCell::new(ManuallyDrop::new(p)), + init: UnsafeCell::new(ManuallyDrop::new(init)), lifecycle: AtomicUsize::new(LIFECYCLE_INIT), }); @@ -100,10 +97,11 @@ impl Thread { let inner = unsafe { &*p_inner }; // Safety: Since `trampoline` is called only once for each - // `ThreadInner` and only `trampoline` touches `start`, - // `start` contains contents and is safe to mutably borrow. - let p = unsafe { ManuallyDrop::take(&mut *inner.start.get()) }; - p(); + // `ThreadInner` and only `trampoline` touches `init`, + // `init` contains contents and is safe to mutably borrow. + let init = unsafe { ManuallyDrop::take(&mut *inner.init.get()) }; + let rust_start = init.init(); + rust_start(); // Fix the current thread's state just in case, so that the // destructors won't abort diff --git a/library/std/src/sys/thread/teeos.rs b/library/std/src/sys/thread/teeos.rs index cad100395c9f8..5e71f757eaa4b 100644 --- a/library/std/src/sys/thread/teeos.rs +++ b/library/std/src/sys/thread/teeos.rs @@ -1,5 +1,6 @@ use crate::mem::{self, ManuallyDrop}; use crate::sys::os; +use crate::thread::ThreadInit; use crate::time::Duration; use crate::{cmp, io, ptr}; @@ -24,12 +25,8 @@ unsafe impl Sync for Thread {} impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { - let p = Box::into_raw(Box::new(p)); + pub unsafe fn new(stack: usize, init: Box) -> io::Result { + let data = Box::into_raw(init); let mut native: libc::pthread_t = unsafe { mem::zeroed() }; let mut attr: libc::pthread_attr_t = unsafe { mem::zeroed() }; assert_eq!(unsafe { libc::pthread_attr_init(&mut attr) }, 0); @@ -62,16 +59,16 @@ impl Thread { } }; - let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, p as *mut _) }; - // Note: if the thread creation fails and this assert fails, then p will + let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, data as *mut _) }; + // Note: if the thread creation fails and this assert fails, then data will // be leaked. However, an alternative design could cause double-free // which is clearly worse. assert_eq!(unsafe { libc::pthread_attr_destroy(&mut attr) }, 0); return if ret != 0 { - // The thread failed to start and as a result p was not consumed. Therefore, it is + // The thread failed to start and as a result data was not consumed. Therefore, it is // safe to reconstruct the box so that it gets deallocated. - drop(unsafe { Box::from_raw(p) }); + drop(unsafe { Box::from_raw(data) }); Err(io::Error::from_raw_os_error(ret)) } else { // The new thread will start running earliest after the next yield. @@ -80,15 +77,11 @@ impl Thread { Ok(Thread { id: native }) }; - extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void { - unsafe { - // Next, set up our stack overflow handler which may get triggered if we run - // out of stack. - // this is not necessary in TEE. - //let _handler = stack_overflow::Handler::new(); - // Finally, let's run some code. - Box::from_raw(main as *mut Box)(); - } + extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void { + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = unsafe { Box::from_raw(data as *mut ThreadInit) }; + let rust_start = init.init(); + rust_start(); ptr::null_mut() } } diff --git a/library/std/src/sys/thread/unix.rs b/library/std/src/sys/thread/unix.rs index 9b26262bc80dc..d4c27097afd79 100644 --- a/library/std/src/sys/thread/unix.rs +++ b/library/std/src/sys/thread/unix.rs @@ -14,6 +14,7 @@ use crate::sys::weak::dlsym; #[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto",))] use crate::sys::weak::weak; use crate::sys::{os, stack_overflow}; +use crate::thread::{ThreadInit, current}; use crate::time::Duration; use crate::{cmp, io, ptr}; #[cfg(not(any( @@ -30,11 +31,6 @@ pub const DEFAULT_MIN_STACK_SIZE: usize = 256 * 1024; #[cfg(any(target_os = "espidf", target_os = "nuttx"))] pub const DEFAULT_MIN_STACK_SIZE: usize = 0; // 0 indicates that the stack size configured in the ESP-IDF/NuttX menuconfig system should be used -struct ThreadData { - name: Option>, - f: Box, -} - pub struct Thread { id: libc::pthread_t, } @@ -47,13 +43,8 @@ unsafe impl Sync for Thread {} impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces - pub unsafe fn new( - stack: usize, - name: Option<&str>, - f: Box, - ) -> io::Result { - let data = Box::new(ThreadData { name: name.map(Box::from), f }); - + pub unsafe fn new(stack: usize, init: Box) -> io::Result { + let data = init; let mut attr: mem::MaybeUninit = mem::MaybeUninit::uninit(); assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0); let mut attr = DropGuard::new(&mut attr, |attr| { @@ -116,12 +107,16 @@ impl Thread { extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void { unsafe { - let data = Box::from_raw(data as *mut ThreadData); - // Next, set up our stack overflow handler which may get triggered if we run - // out of stack. - let _handler = stack_overflow::Handler::new(data.name); - // Finally, let's run some code. - (data.f)(); + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = Box::from_raw(data as *mut ThreadInit); + let rust_start = init.init(); + + // Set up our thread name and stack overflow handler which may get triggered + // if we run out of stack. + let thread = current(); + let _handler = stack_overflow::Handler::new(thread.name().map(Box::from)); + + rust_start(); } ptr::null_mut() } diff --git a/library/std/src/sys/thread/unsupported.rs b/library/std/src/sys/thread/unsupported.rs index a5001efa3b405..2d8524f825499 100644 --- a/library/std/src/sys/thread/unsupported.rs +++ b/library/std/src/sys/thread/unsupported.rs @@ -1,6 +1,7 @@ use crate::ffi::CStr; use crate::io; use crate::num::NonZero; +use crate::thread::ThreadInit; use crate::time::Duration; pub struct Thread(!); @@ -9,11 +10,7 @@ pub const DEFAULT_MIN_STACK_SIZE: usize = 64 * 1024; impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements - pub unsafe fn new( - _stack: usize, - _name: Option<&str>, - _p: Box, - ) -> io::Result { + pub unsafe fn new(_stack: usize, _init: Box) -> io::Result { Err(io::Error::UNSUPPORTED_PLATFORM) } diff --git a/library/std/src/sys/thread/wasip1.rs b/library/std/src/sys/thread/wasip1.rs index 83001fad49c81..6932691b74390 100644 --- a/library/std/src/sys/thread/wasip1.rs +++ b/library/std/src/sys/thread/wasip1.rs @@ -7,6 +7,7 @@ use crate::mem; use crate::num::NonZero; #[cfg(target_feature = "atomics")] use crate::sys::os; +use crate::thread::ThreadInit; use crate::time::Duration; #[cfg(target_feature = "atomics")] use crate::{cmp, ptr}; @@ -73,12 +74,8 @@ pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 1024; #[cfg(target_feature = "atomics")] impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { - let p = Box::into_raw(Box::new(p)); + pub unsafe fn new(stack: usize, init: Box) -> io::Result { + let data = Box::into_raw(init); let mut native: libc::pthread_t = unsafe { mem::zeroed() }; let mut attr: libc::pthread_attr_t = unsafe { mem::zeroed() }; assert_eq!(unsafe { libc::pthread_attr_init(&mut attr) }, 0); @@ -100,28 +97,28 @@ impl Thread { } }; - let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, p as *mut _) }; - // Note: if the thread creation fails and this assert fails, then p will + let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, data as *mut _) }; + // Note: if the thread creation fails and this assert fails, then data will // be leaked. However, an alternative design could cause double-free // which is clearly worse. assert_eq!(unsafe { libc::pthread_attr_destroy(&mut attr) }, 0); return if ret != 0 { - // The thread failed to start and as a result p was not consumed. Therefore, it is + // The thread failed to start and as a result data was not consumed. Therefore, it is // safe to reconstruct the box so that it gets deallocated. unsafe { - drop(Box::from_raw(p)); + drop(Box::from_raw(data)); } Err(io::Error::from_raw_os_error(ret)) } else { Ok(Thread { id: native }) }; - extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void { - unsafe { - // Finally, let's run some code. - Box::from_raw(main as *mut Box)(); - } + extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void { + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = unsafe { Box::from_raw(data as *mut ThreadInit) }; + let rust_start = init.init(); + rust_start(); ptr::null_mut() } } diff --git a/library/std/src/sys/thread/windows.rs b/library/std/src/sys/thread/windows.rs index a5640c51c4a5d..1ef496a20cfe4 100644 --- a/library/std/src/sys/thread/windows.rs +++ b/library/std/src/sys/thread/windows.rs @@ -8,6 +8,7 @@ use crate::sys::pal::time::WaitableTimer; use crate::sys::pal::{dur2timeout, to_u16s}; use crate::sys::{c, stack_overflow}; use crate::sys_common::FromInner; +use crate::thread::ThreadInit; use crate::time::Duration; use crate::{io, ptr}; @@ -20,23 +21,19 @@ pub struct Thread { impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { - let p = Box::into_raw(Box::new(p)); + pub unsafe fn new(stack: usize, init: Box) -> io::Result { + let data = Box::into_raw(init); // CreateThread rounds up values for the stack size to the nearest page size (at least 4kb). // If a value of zero is given then the default stack size is used instead. // SAFETY: `thread_start` has the right ABI for a thread's entry point. - // `p` is simply passed through to the new thread without being touched. + // `data` is simply passed through to the new thread without being touched. let ret = unsafe { let ret = c::CreateThread( ptr::null_mut(), stack, Some(thread_start), - p as *mut _, + data as *mut _, c::STACK_SIZE_PARAM_IS_A_RESERVATION, ptr::null_mut(), ); @@ -45,19 +42,21 @@ impl Thread { return if let Ok(handle) = ret.try_into() { Ok(Thread { handle: Handle::from_inner(handle) }) } else { - // The thread failed to start and as a result p was not consumed. Therefore, it is + // The thread failed to start and as a result data was not consumed. Therefore, it is // safe to reconstruct the box so that it gets deallocated. - unsafe { drop(Box::from_raw(p)) }; + unsafe { drop(Box::from_raw(data)) }; Err(io::Error::last_os_error()) }; - unsafe extern "system" fn thread_start(main: *mut c_void) -> u32 { - // Next, reserve some stack space for if we otherwise run out of stack. + unsafe extern "system" fn thread_start(data: *mut c_void) -> u32 { + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = unsafe { Box::from_raw(data as *mut ThreadInit) }; + let rust_start = init.init(); + + // Reserve some stack space for if we otherwise run out of stack. stack_overflow::reserve_stack(); - // Finally, let's run some code. - // SAFETY: We are simply recreating the box that was leaked earlier. - // It's the responsibility of the one who call `Thread::new` to ensure this is safe to call here. - unsafe { Box::from_raw(main as *mut Box)() }; + + rust_start(); 0 } } diff --git a/library/std/src/sys/thread/xous.rs b/library/std/src/sys/thread/xous.rs index 133e15a0928c6..28f751337a235 100644 --- a/library/std/src/sys/thread/xous.rs +++ b/library/std/src/sys/thread/xous.rs @@ -7,6 +7,7 @@ use crate::os::xous::ffi::{ map_memory, update_memory_flags, }; use crate::os::xous::services::{TicktimerScalar, ticktimer_server}; +use crate::thread::ThreadInit; use crate::time::Duration; pub struct Thread { @@ -19,12 +20,8 @@ pub const GUARD_PAGE_SIZE: usize = 4096; impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { - let p = Box::into_raw(Box::new(p)); + pub unsafe fn new(stack: usize, init: Box) -> io::Result { + let data = Box::into_raw(init); let mut stack_size = crate::cmp::max(stack, MIN_STACK_SIZE); if (stack_size & 4095) != 0 { @@ -65,7 +62,7 @@ impl Thread { let tid = create_thread( thread_start as *mut usize, &mut stack_plus_guard_pages[GUARD_PAGE_SIZE..(stack_size + GUARD_PAGE_SIZE)], - p as usize, + data as usize, guard_page_pre, stack_size, 0, @@ -73,14 +70,14 @@ impl Thread { .map_err(|code| io::Error::from_raw_os_error(code as i32))?; extern "C" fn thread_start( - main: *mut usize, + data: *mut usize, guard_page_pre: usize, stack_size: usize, ) -> ! { - unsafe { - // Run the contents of the new thread. - Box::from_raw(main as *mut Box)(); - } + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = unsafe { Box::from_raw(data as *mut ThreadInit) }; + let rust_start = init.init(); + rust_start(); // Destroy TLS, which will free the TLS page and call the destructor for // any thread local storage (if any). diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index 238682cef3fa2..e15d0642135f4 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -212,6 +212,36 @@ pub mod local_impl { pub use crate::sys::thread_local::*; } +/// The data passed to the spawned thread for thread initialization. Any thread +/// implementation should start a new thread by calling .init() on this before +/// doing anything else to ensure the current thread is properly initialized and +/// the global allocator works. +pub(crate) struct ThreadInit { + pub handle: Thread, + pub rust_start: Box, +} + +impl ThreadInit { + /// Initialize the 'current thread' mechanism on this thread, returning the + /// Rust entry point. + pub fn init(self: Box) -> Box { + // Set the current thread before any (de)allocations on the global allocator occur, + // so that it may call std::thread::current() in its implementation. This is also + // why we take Box, to ensure the Box is not destroyed until after this point. + // Cloning the handle does not invoke the global allocator, it is an Arc. + if let Err(_thread) = set_current(self.handle.clone()) { + // The current thread should not have set yet. Use an abort to save binary size (see #123356). + rtabort!("current thread handle already set during thread spawn"); + } + + if let Some(name) = self.handle.cname() { + imp::set_name(name); + } + + self.rust_start + } +} + //////////////////////////////////////////////////////////////////////////////// // Builder //////////////////////////////////////////////////////////////////////////////// @@ -503,16 +533,14 @@ impl Builder { }); let id = ThreadId::new(); - let my_thread = Thread::new(id, name); + let thread = Thread::new(id, name); let hooks = if no_hooks { spawnhook::ChildSpawnHooks::default() } else { - spawnhook::run_spawn_hooks(&my_thread) + spawnhook::run_spawn_hooks(&thread) }; - let their_thread = my_thread.clone(); - let my_packet: Arc> = Arc::new(Packet { scope: scope_data, result: UnsafeCell::new(None), @@ -544,19 +572,10 @@ impl Builder { } let f = MaybeDangling::new(f); - let main = move || { - if let Err(_thread) = set_current(their_thread.clone()) { - // Both the current thread handle and the ID should not be - // initialized yet. Since only the C runtime and some of our - // platform code run before this, this point shouldn't be - // reachable. Use an abort to save binary size (see #123356). - rtabort!("something here is badly broken!"); - } - - if let Some(name) = their_thread.cname() { - imp::set_name(name); - } + // The entrypoint of the Rust thread, after platform-specific thread + // initialization is done. + let rust_start = move || { let f = f.into_inner(); let try_result = panic::catch_unwind(panic::AssertUnwindSafe(|| { crate::sys::backtrace::__rust_begin_short_backtrace(|| hooks.run()); @@ -579,11 +598,15 @@ impl Builder { scope_data.increment_num_running_threads(); } - let main = Box::new(main); // SAFETY: dynamic size and alignment of the Box remain the same. See below for why the // lifetime change is justified. - let main = - unsafe { Box::from_raw(Box::into_raw(main) as *mut (dyn FnOnce() + Send + 'static)) }; + let rust_start = unsafe { + Box::from_raw( + Box::into_raw(Box::new(rust_start)) as *mut (dyn FnOnce() + Send + 'static) + ) + }; + + let init = Box::new(ThreadInit { handle: thread.clone(), rust_start }); Ok(JoinInner { // SAFETY: @@ -599,8 +622,8 @@ impl Builder { // Similarly, the `sys` implementation must guarantee that no references to the closure // exist after the thread has terminated, which is signaled by `Thread::join` // returning. - native: unsafe { imp::Thread::new(stack_size, my_thread.name(), main)? }, - thread: my_thread, + native: unsafe { imp::Thread::new(stack_size, init)? }, + thread, packet: my_packet, }) } @@ -1704,7 +1727,7 @@ impl Thread { } } - fn cname(&self) -> Option<&CStr> { + pub(crate) fn cname(&self) -> Option<&CStr> { if let Some(name) = &self.inner.name { Some(name.as_cstr()) } else if main_thread::get() == Some(self.inner.id) { From 0b7cdf24355d873745fc7d91ce255e227b6b27bf Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Wed, 1 Oct 2025 15:01:20 +0200 Subject: [PATCH 05/12] Address review comments --- .../src/sys/thread_local/destructors/list.rs | 4 +++- library/std/src/thread/current.rs | 22 +++++-------------- library/std/src/thread/mod.rs | 21 ++++++++---------- 3 files changed, 18 insertions(+), 29 deletions(-) diff --git a/library/std/src/sys/thread_local/destructors/list.rs b/library/std/src/sys/thread_local/destructors/list.rs index 606694cc78d79..44e00c8a5ae59 100644 --- a/library/std/src/sys/thread_local/destructors/list.rs +++ b/library/std/src/sys/thread_local/destructors/list.rs @@ -7,7 +7,9 @@ static DTORS: RefCell> = RefCell::new(Vec::new_in(System)); pub unsafe fn register(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { - let Ok(mut dtors) = DTORS.try_borrow_mut() else { rtabort!("unreachable") }; + let Ok(mut dtors) = DTORS.try_borrow_mut() else { + rtabort!("the System allocator may not use TLS with destructors") + }; guard::enable(); dtors.push((t, dtor)); } diff --git a/library/std/src/thread/current.rs b/library/std/src/thread/current.rs index c4619a80021a6..ea0c6c7229fe8 100644 --- a/library/std/src/thread/current.rs +++ b/library/std/src/thread/current.rs @@ -269,23 +269,13 @@ fn init_current(current: *mut ()) -> Thread { // BUSY exists solely for this check, but as it is in the slow path, the // extra TLS write above shouldn't matter. The alternative is nearly always // a stack overflow. - - // If you came across this message, contact the author of your - // allocator. If you are said author: A surprising amount of functions - // inside the standard library (e.g. `Mutex`, `File` when using long - // paths, even `panic!` when using unwinding), need memory allocation, - // so you'll get circular dependencies all over the place when using - // them. I (joboet) highly recommend using only APIs from core in your - // allocator and implementing your own system abstractions. Still, if - // you feel that a particular API should be entirely allocation-free, - // feel free to open an issue on the Rust repository, we'll see what we - // can do. + // + // If we reach this point it means our initialization routine ended up + // calling current() either directly, or indirectly through the global + // allocator, which is a bug either way as we may not call the global + // allocator in current(). rtabort!( - "\n\ - Attempted to access thread-local data while allocating said data.\n\ - Do not access functions that allocate in the global allocator!\n\ - This is a bug in the global allocator.\n\ - " + "init_current() was re-entrant, which indicates a bug in the Rust threading implementation" ) } else { debug_assert_eq!(current, DESTROYED); diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index e15d0642135f4..523b41525e2d7 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -1305,20 +1305,17 @@ impl ThreadId { spin += 1; } - let id; // SAFETY: we have an exclusive lock on the counter. unsafe { - id = (*COUNTER.get()).saturating_add(1); - (*COUNTER.get()) = id; - }; - - // Release the lock. - COUNTER_LOCKED.store(false, Ordering::Release); - - if id == u64::MAX { - exhausted() - } else { - ThreadId(NonZero::new(id).unwrap()) + if *COUNTER.get() == u64::MAX { + COUNTER_LOCKED.store(false, Ordering::Release); + exhausted() + } else { + let id = *COUNTER.get() + 1; + *COUNTER.get() = id; + COUNTER_LOCKED.store(false, Ordering::Release); + ThreadId(NonZero::new(id).unwrap()) + } } } } From 85a49073c325a7339f8b583492177aa87c12795b Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Wed, 1 Oct 2025 15:06:30 +0200 Subject: [PATCH 06/12] Use checked_add instead of manual overflow check --- library/std/src/thread/mod.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index 523b41525e2d7..a806261a7c393 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -1307,14 +1307,13 @@ impl ThreadId { // SAFETY: we have an exclusive lock on the counter. unsafe { - if *COUNTER.get() == u64::MAX { - COUNTER_LOCKED.store(false, Ordering::Release); - exhausted() - } else { - let id = *COUNTER.get() + 1; + if let Some(id) = (*COUNTER.get()).checked_add(1) { *COUNTER.get() = id; COUNTER_LOCKED.store(false, Ordering::Release); ThreadId(NonZero::new(id).unwrap()) + } else { + COUNTER_LOCKED.store(false, Ordering::Release); + exhausted() } } } From b717684f877fa481aef40005aa78448262021457 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 3 Oct 2025 00:52:18 +0200 Subject: [PATCH 07/12] Fix SGX implementation --- library/std/src/sys/pal/sgx/abi/mod.rs | 5 ++++- library/std/src/sys/pal/sgx/abi/tls/mod.rs | 7 ------- library/std/src/sys/thread/sgx.rs | 19 +++++++++---------- library/std/src/thread/mod.rs | 4 ++-- 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/library/std/src/sys/pal/sgx/abi/mod.rs b/library/std/src/sys/pal/sgx/abi/mod.rs index b8c4d7740c4e1..1c6c681d4c179 100644 --- a/library/std/src/sys/pal/sgx/abi/mod.rs +++ b/library/std/src/sys/pal/sgx/abi/mod.rs @@ -3,6 +3,7 @@ use core::arch::global_asm; use core::sync::atomic::{Atomic, AtomicUsize, Ordering}; +use crate::alloc::System; use crate::io::Write; // runtime features @@ -63,7 +64,9 @@ unsafe extern "C" fn tcs_init(secondary: bool) { #[unsafe(no_mangle)] extern "C" fn entry(p1: u64, p2: u64, p3: u64, secondary: bool, p4: u64, p5: u64) -> EntryReturn { // FIXME: how to support TLS in library mode? - let tls = Box::new(tls::Tls::new()); + // We use the System allocator here such that the global allocator may use + // thread-locals. + let tls = Box::new_in(tls::Tls::new(), System); let tls_guard = unsafe { tls.activate() }; if secondary { diff --git a/library/std/src/sys/pal/sgx/abi/tls/mod.rs b/library/std/src/sys/pal/sgx/abi/tls/mod.rs index 41e38b6961680..553814dcb5fda 100644 --- a/library/std/src/sys/pal/sgx/abi/tls/mod.rs +++ b/library/std/src/sys/pal/sgx/abi/tls/mod.rs @@ -89,13 +89,6 @@ impl Tls { ActiveTls { tls: self } } - #[allow(unused)] - pub unsafe fn activate_persistent(self: Box) { - // FIXME: Needs safety information. See entry.S for `set_tls_ptr` definition. - let ptr = Box::into_raw(self).cast_const().cast::(); - unsafe { set_tls_ptr(ptr) }; - } - unsafe fn current<'a>() -> &'a Tls { // FIXME: Needs safety information. See entry.S for `set_tls_ptr` definition. unsafe { &*(get_tls_ptr() as *const Tls) } diff --git a/library/std/src/sys/thread/sgx.rs b/library/std/src/sys/thread/sgx.rs index f20ef7d86b9c7..9e6dcfa16713d 100644 --- a/library/std/src/sys/thread/sgx.rs +++ b/library/std/src/sys/thread/sgx.rs @@ -2,6 +2,7 @@ use crate::io; use crate::sys::pal::abi::{thread, usercalls}; +use crate::thread::ThreadInit; use crate::time::Duration; pub struct Thread(task_queue::JoinHandle); @@ -13,6 +14,7 @@ pub use self::task_queue::JoinNotifier; mod task_queue { use super::wait_notify; use crate::sync::{Mutex, MutexGuard}; + use crate::thread::ThreadInit; pub type JoinHandle = wait_notify::Waiter; @@ -25,19 +27,20 @@ mod task_queue { } pub(super) struct Task { - p: Box, + init: Box, done: JoinNotifier, } impl Task { - pub(super) fn new(p: Box) -> (Task, JoinHandle) { + pub(super) fn new(init: Box) -> (Task, JoinHandle) { let (done, recv) = wait_notify::new(); let done = JoinNotifier(Some(done)); - (Task { p, done }, recv) + (Task { init, done }, recv) } pub(super) fn run(self) -> JoinNotifier { - (self.p)(); + let rust_start = self.init.init(); + rust_start(); self.done } } @@ -93,14 +96,10 @@ pub mod wait_notify { impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements - pub unsafe fn new( - _stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { + pub unsafe fn new(_stack: usize, init: Box) -> io::Result { let mut queue_lock = task_queue::lock(); unsafe { usercalls::launch_thread()? }; - let (task, handle) = task_queue::Task::new(p); + let (task, handle) = task_queue::Task::new(init); queue_lock.push(task); Ok(Thread(handle)) } diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index a806261a7c393..983d189b07024 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -218,13 +218,13 @@ pub mod local_impl { /// the global allocator works. pub(crate) struct ThreadInit { pub handle: Thread, - pub rust_start: Box, + pub rust_start: Box, } impl ThreadInit { /// Initialize the 'current thread' mechanism on this thread, returning the /// Rust entry point. - pub fn init(self: Box) -> Box { + pub fn init(self: Box) -> Box { // Set the current thread before any (de)allocations on the global allocator occur, // so that it may call std::thread::current() in its implementation. This is also // why we take Box, to ensure the Box is not destroyed until after this point. From 9b5bb75875bccbfbda0b746362b711d348c24a95 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 3 Oct 2025 01:04:49 +0200 Subject: [PATCH 08/12] Add inline(never) to prevent reordering after TLS has been destroyed --- library/std/src/sys/thread/xous.rs | 14 ++++++++++++-- library/std/src/sys/thread_local/key/xous.rs | 5 +++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/library/std/src/sys/thread/xous.rs b/library/std/src/sys/thread/xous.rs index 28f751337a235..6c2cdfa4acddf 100644 --- a/library/std/src/sys/thread/xous.rs +++ b/library/std/src/sys/thread/xous.rs @@ -69,6 +69,12 @@ impl Thread { ) .map_err(|code| io::Error::from_raw_os_error(code as i32))?; + #[inline(never)] + fn rust_main_thread_not_inlined(init: Box) { + let rust_start = init.init(); + rust_start(); + } + extern "C" fn thread_start( data: *mut usize, guard_page_pre: usize, @@ -76,8 +82,12 @@ impl Thread { ) -> ! { // SAFETY: we are simply recreating the box that was leaked earlier. let init = unsafe { Box::from_raw(data as *mut ThreadInit) }; - let rust_start = init.init(); - rust_start(); + + // Run the main thread with an inline(never) barrier to prevent + // dealloc calls from being reordered to after the TLS has been destroyed. + // See https://github.com/rust-lang/rust/pull/144465#pullrequestreview-3289729950 + // for more context. + run_main_thread_not_inlined(init); // Destroy TLS, which will free the TLS page and call the destructor for // any thread local storage (if any). diff --git a/library/std/src/sys/thread_local/key/xous.rs b/library/std/src/sys/thread_local/key/xous.rs index 529178f34757b..db83d2bf4a13a 100644 --- a/library/std/src/sys/thread_local/key/xous.rs +++ b/library/std/src/sys/thread_local/key/xous.rs @@ -186,6 +186,11 @@ pub unsafe fn destroy_tls() { }; } +// This is marked inline(never) to prevent dealloc calls from being reordered +// to after the TLS has been destroyed. +// See https://github.com/rust-lang/rust/pull/144465#pullrequestreview-3289729950 +// for more context. +#[inline(never)] unsafe fn run_dtors() { let mut any_run = true; From ff8ed14a3fee2e75e79f060f5ee7b77e45597697 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 3 Oct 2025 01:34:09 +0200 Subject: [PATCH 09/12] Expand test with allocating from thread-local destructors --- .../threads-sendsync/tls-in-global-alloc.rs | 69 +++++++++++++++---- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/tests/ui/threads-sendsync/tls-in-global-alloc.rs b/tests/ui/threads-sendsync/tls-in-global-alloc.rs index 82a96ce9a6c3a..424e43ebccca0 100644 --- a/tests/ui/threads-sendsync/tls-in-global-alloc.rs +++ b/tests/ui/threads-sendsync/tls-in-global-alloc.rs @@ -2,27 +2,46 @@ //@ needs-threads use std::alloc::{GlobalAlloc, Layout, System}; -use std::thread::Thread; -use std::sync::atomic::{AtomicUsize, AtomicBool, Ordering}; +use std::hint::black_box; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::thread::{Thread, ThreadId}; static GLOBAL: AtomicUsize = AtomicUsize::new(0); +static SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS: AtomicBool = AtomicBool::new(false); +static LOCAL_TRY_WITH_SUCCEEDED_ALLOC: AtomicBool = AtomicBool::new(false); +static LOCAL_TRY_WITH_SUCCEEDED_DEALLOC: AtomicBool = AtomicBool::new(false); -struct Local(Thread); +struct LocalForAllocatorWithoutDrop(ThreadId); -thread_local! { - static LOCAL: Local = { - GLOBAL.fetch_or(1, Ordering::Relaxed); - Local(std::thread::current()) - }; -} +struct LocalForAllocatorWithDrop(Thread); -impl Drop for Local { +impl Drop for LocalForAllocatorWithDrop { fn drop(&mut self) { GLOBAL.fetch_or(2, Ordering::Relaxed); } } -static SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS: AtomicBool = AtomicBool::new(false); +struct LocalForUser(u32); + +impl Drop for LocalForUser { + // A user might call the global allocator in a thread-local drop. + fn drop(&mut self) { + self.0 += 1; + drop(black_box(Box::new(self.0))) + } +} + +thread_local! { + static LOCAL_FOR_USER0: LocalForUser = LocalForUser(0); + static LOCAL_FOR_ALLOCATOR_WITHOUT_DROP: LocalForAllocatorWithoutDrop = { + LocalForAllocatorWithoutDrop(std::thread::current().id()) + }; + static LOCAL_FOR_ALLOCATOR_WITH_DROP: LocalForAllocatorWithDrop = { + GLOBAL.fetch_or(1, Ordering::Relaxed); + LocalForAllocatorWithDrop(std::thread::current()) + }; + static LOCAL_FOR_USER1: LocalForUser = LocalForUser(1); +} #[global_allocator] static ALLOC: Alloc = Alloc; @@ -33,9 +52,19 @@ unsafe impl GlobalAlloc for Alloc { // Make sure we aren't re-entrant. assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed)); SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed); - LOCAL.with(|local| { + + // Should be infallible. + LOCAL_FOR_ALLOCATOR_WITHOUT_DROP.with(|local| { + assert!(local.0 == std::thread::current().id()); + }); + + // May fail once thread-local destructors start running, and ours has + // been ran. + let try_with_ret = LOCAL_FOR_ALLOCATOR_WITH_DROP.try_with(|local| { assert!(local.0.id() == std::thread::current().id()); }); + LOCAL_TRY_WITH_SUCCEEDED_ALLOC.fetch_or(try_with_ret.is_ok(), Ordering::Relaxed); + let ret = unsafe { System.alloc(layout) }; SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed); ret @@ -45,9 +74,19 @@ unsafe impl GlobalAlloc for Alloc { // Make sure we aren't re-entrant. assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed)); SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed); - LOCAL.with(|local| { + + // Should be infallible. + LOCAL_FOR_ALLOCATOR_WITHOUT_DROP.with(|local| { + assert!(local.0 == std::thread::current().id()); + }); + + // May fail once thread-local destructors start running, and ours has + // been ran. + let try_with_ret = LOCAL_FOR_ALLOCATOR_WITH_DROP.try_with(|local| { assert!(local.0.id() == std::thread::current().id()); }); + LOCAL_TRY_WITH_SUCCEEDED_DEALLOC.fetch_or(try_with_ret.is_ok(), Ordering::Relaxed); + unsafe { System.dealloc(ptr, layout) } SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed); } @@ -55,10 +94,14 @@ unsafe impl GlobalAlloc for Alloc { fn main() { std::thread::spawn(|| { + LOCAL_FOR_USER0.with(|l| assert!(l.0 == 0)); std::hint::black_box(vec![1, 2]); assert!(GLOBAL.load(Ordering::Relaxed) == 1); + LOCAL_FOR_USER1.with(|l| assert!(l.0 == 1)); }) .join() .unwrap(); assert!(GLOBAL.load(Ordering::Relaxed) == 3); + assert!(LOCAL_TRY_WITH_SUCCEEDED_ALLOC.load(Ordering::Relaxed)); + assert!(LOCAL_TRY_WITH_SUCCEEDED_DEALLOC.load(Ordering::Relaxed)); } From 4ecf505eb765a97f55666bf11fd7d1c1cce6b31f Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 28 Nov 2025 00:47:36 +0100 Subject: [PATCH 10/12] Silence dead code warning for ThreadInit::init --- library/std/src/sys/thread/unsupported.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/library/std/src/sys/thread/unsupported.rs b/library/std/src/sys/thread/unsupported.rs index 2d8524f825499..d633e83371eae 100644 --- a/library/std/src/sys/thread/unsupported.rs +++ b/library/std/src/sys/thread/unsupported.rs @@ -4,6 +4,12 @@ use crate::num::NonZero; use crate::thread::ThreadInit; use crate::time::Duration; +// Silence dead code warnings for the otherwise unused ThreadInit::init() call. +#[expect(dead_code)] +fn dummy_init_call(init: Box) { + drop(init.init()); +} + pub struct Thread(!); pub const DEFAULT_MIN_STACK_SIZE: usize = 64 * 1024; From 48fc0fd597154577c6f7b013277183a2bf81c83f Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 28 Nov 2025 02:12:54 +0100 Subject: [PATCH 11/12] Add missing feature flag --- library/std/src/sys/thread/wasip1.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/library/std/src/sys/thread/wasip1.rs b/library/std/src/sys/thread/wasip1.rs index 6932691b74390..9287a9c5485cc 100644 --- a/library/std/src/sys/thread/wasip1.rs +++ b/library/std/src/sys/thread/wasip1.rs @@ -7,6 +7,7 @@ use crate::mem; use crate::num::NonZero; #[cfg(target_feature = "atomics")] use crate::sys::os; +#[cfg(target_feature = "atomics")] use crate::thread::ThreadInit; use crate::time::Duration; #[cfg(target_feature = "atomics")] From 492fbc56c18fe29fdef23d1525440f0f95d22b9a Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 28 Nov 2025 23:35:38 +0100 Subject: [PATCH 12/12] Update debuginfo test from Global to System --- tests/debuginfo/thread.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/debuginfo/thread.rs b/tests/debuginfo/thread.rs index 5120da62f2ab9..4274e449f1c9d 100644 --- a/tests/debuginfo/thread.rs +++ b/tests/debuginfo/thread.rs @@ -14,7 +14,7 @@ // //@ cdb-command:dx t,d //@ cdb-check:t,d : [...] [Type: std::thread::Thread *] -//@ cdb-check:[...] inner [...][Type: core::pin::Pin >] +//@ cdb-check:[...] inner [...][Type: core::pin::Pin >] use std::thread;