diff --git a/cranelift/jit/src/backend.rs b/cranelift/jit/src/backend.rs index 5a2f27d52a98..0f52b08361bb 100644 --- a/cranelift/jit/src/backend.rs +++ b/cranelift/jit/src/backend.rs @@ -1,6 +1,9 @@ //! Defines `JITModule`. -use crate::{compiled_blob::CompiledBlob, memory::BranchProtection, memory::Memory}; +use crate::{ + compiled_blob::CompiledBlob, + memory::{BranchProtection, JITMemoryProvider, SystemMemoryProvider}, +}; use cranelift_codegen::binemit::Reloc; use cranelift_codegen::isa::{OwnedTargetIsa, TargetIsa}; use cranelift_codegen::settings::Configurable; @@ -28,6 +31,7 @@ pub struct JITBuilder { symbols: HashMap>, lookup_symbols: Vec Option<*const u8> + Send>>, libcall_names: Box String + Send + Sync>, + memory: Option>, } impl JITBuilder { @@ -91,6 +95,7 @@ impl JITBuilder { symbols, lookup_symbols, libcall_names, + memory: None, } } @@ -141,6 +146,14 @@ impl JITBuilder { self.lookup_symbols.push(symbol_lookup_fn); self } + + /// Set the memory provider for the module. + /// + /// If unset, defaults to [`SystemMemoryProvider`]. + pub fn memory_provider(&mut self, provider: Box) -> &mut Self { + self.memory = Some(provider); + self + } } /// A wrapper that impls Send for the contents. @@ -159,7 +172,7 @@ pub struct JITModule { symbols: RefCell>>, lookup_symbols: Vec Option<*const u8> + Send>>, libcall_names: Box String + Send + Sync>, - memory: MemoryHandle, + memory: Box, declarations: ModuleDeclarations, compiled_functions: SecondaryMap>, compiled_data_objects: SecondaryMap>, @@ -167,13 +180,6 @@ pub struct JITModule { data_objects_to_finalize: Vec, } -/// A handle to allow freeing memory allocated by the `Module`. -struct MemoryHandle { - code: Memory, - readonly: Memory, - writable: Memory, -} - impl JITModule { /// Free memory allocated for code and data segments of compiled functions. /// @@ -184,9 +190,7 @@ impl JITModule { /// from that module are currently executing and none of the `fn` pointers /// are called afterwards. pub unsafe fn free_memory(mut self) { - self.memory.code.free_memory(); - self.memory.readonly.free_memory(); - self.memory.writable.free_memory(); + self.memory.free_memory(); } fn lookup_symbol(&self, name: &str) -> Option<*const u8> { @@ -325,8 +329,12 @@ impl JITModule { } // Now that we're done patching, prepare the memory for execution! - self.memory.readonly.set_readonly()?; - self.memory.code.set_readable_and_executable()?; + let branch_protection = if cfg!(target_arch = "aarch64") && use_bti(&self.isa.isa_flags()) { + BranchProtection::BTI + } else { + BranchProtection::None + }; + self.memory.finalize(branch_protection)?; Ok(()) } @@ -338,23 +346,15 @@ impl JITModule { "cranelift-jit needs is_pic=false" ); - let branch_protection = - if cfg!(target_arch = "aarch64") && use_bti(&builder.isa.isa_flags()) { - BranchProtection::BTI - } else { - BranchProtection::None - }; + let memory = builder + .memory + .unwrap_or_else(|| Box::new(SystemMemoryProvider::new())); Self { isa: builder.isa, symbols: RefCell::new(builder.symbols), lookup_symbols: builder.lookup_symbols, libcall_names: builder.libcall_names, - memory: MemoryHandle { - code: Memory::new(branch_protection), - // Branch protection is not applicable to non-executable memory. - readonly: Memory::new(BranchProtection::None), - writable: Memory::new(BranchProtection::None), - }, + memory, declarations: ModuleDeclarations::default(), compiled_functions: SecondaryMap::new(), compiled_data_objects: SecondaryMap::new(), @@ -436,15 +436,16 @@ impl Module for JITModule { let compiled_code = ctx.compiled_code().unwrap(); let size = compiled_code.code_info().total_size as usize; - let align = alignment.max(self.isa.symbol_alignment()); - let ptr = self - .memory - .code - .allocate(size, align) - .map_err(|e| ModuleError::Allocation { - message: "unable to alloc function", - err: e, - })?; + let align = alignment + .max(self.isa.function_alignment().minimum as u64) + .max(self.isa.symbol_alignment()); + let ptr = + self.memory + .allocate_readexec(size, align) + .map_err(|e| ModuleError::Allocation { + message: "unable to alloc function", + err: e, + })?; { let mem = unsafe { std::slice::from_raw_parts_mut(ptr, size) }; @@ -488,15 +489,16 @@ impl Module for JITModule { } let size = bytes.len(); - let align = alignment.max(self.isa.symbol_alignment()); - let ptr = self - .memory - .code - .allocate(size, align) - .map_err(|e| ModuleError::Allocation { - message: "unable to alloc function bytes", - err: e, - })?; + let align = alignment + .max(self.isa.function_alignment().minimum as u64) + .max(self.isa.symbol_alignment()); + let ptr = + self.memory + .allocate_readexec(size, align) + .map_err(|e| ModuleError::Allocation { + message: "unable to alloc function bytes", + err: e, + })?; unsafe { ptr::copy_nonoverlapping(bytes.as_ptr(), ptr, size); @@ -548,16 +550,14 @@ impl Module for JITModule { let ptr = if decl.writable { self.memory - .writable - .allocate(alloc_size, align.unwrap_or(WRITABLE_DATA_ALIGNMENT)) + .allocate_readwrite(alloc_size, align.unwrap_or(WRITABLE_DATA_ALIGNMENT)) .map_err(|e| ModuleError::Allocation { message: "unable to alloc writable data", err: e, })? } else { self.memory - .readonly - .allocate(alloc_size, align.unwrap_or(READONLY_DATA_ALIGNMENT)) + .allocate_readonly(alloc_size, align.unwrap_or(READONLY_DATA_ALIGNMENT)) .map_err(|e| ModuleError::Allocation { message: "unable to alloc readonly data", err: e, diff --git a/cranelift/jit/src/lib.rs b/cranelift/jit/src/lib.rs index a7e4190cd12f..838668e1a9e7 100644 --- a/cranelift/jit/src/lib.rs +++ b/cranelift/jit/src/lib.rs @@ -10,6 +10,9 @@ mod compiled_blob; mod memory; pub use crate::backend::{JITBuilder, JITModule}; +pub use crate::memory::{ + ArenaMemoryProvider, BranchProtection, JITMemoryProvider, SystemMemoryProvider, +}; /// Version number of this crate. pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/cranelift/jit/src/memory/arena.rs b/cranelift/jit/src/memory/arena.rs new file mode 100644 index 000000000000..5a598fcfccb9 --- /dev/null +++ b/cranelift/jit/src/memory/arena.rs @@ -0,0 +1,284 @@ +use std::io; +use std::mem::ManuallyDrop; +use std::ptr; + +use cranelift_module::ModuleResult; + +use super::{BranchProtection, JITMemoryProvider}; + +fn align_up(addr: usize, align: usize) -> usize { + debug_assert!(align.is_power_of_two()); + (addr + align - 1) & !(align - 1) +} + +#[derive(Debug)] +struct Segment { + ptr: *mut u8, + len: usize, + position: usize, + target_prot: region::Protection, + finalized: bool, +} + +impl Segment { + fn new(ptr: *mut u8, len: usize, target_prot: region::Protection) -> Self { + // Segments are created on page boundaries. + debug_assert_eq!(ptr as usize % region::page::size(), 0); + debug_assert_eq!(len % region::page::size(), 0); + let mut segment = Segment { + ptr, + len, + target_prot, + position: 0, + finalized: false, + }; + // Set segment to read-write for initialization. The target permissions + // will be applied in `finalize`. + segment.set_rw(); + segment + } + + fn set_rw(&mut self) { + unsafe { + region::protect(self.ptr, self.len, region::Protection::READ_WRITE) + .expect("unable to change memory protection for jit memory segment"); + } + } + + fn finalize(&mut self, branch_protection: BranchProtection) { + if self.finalized { + return; + } + + // Executable regions are handled separately to correctly deal with + // branch protection and cache coherence. + if self.target_prot == region::Protection::READ_EXECUTE { + super::set_readable_and_executable(self.ptr, self.len, branch_protection) + .expect("unable to set memory protection for jit memory segment"); + } else { + unsafe { + region::protect(self.ptr, self.len, self.target_prot) + .expect("unable to change memory protection for jit memory segment"); + } + } + self.finalized = true; + } + + // Note: We do pointer arithmetic on `ptr` passed to `Segment::new` here. + // This assumes that `ptr` is valid for `len` bytes, or will result in UB. + fn allocate(&mut self, size: usize, align: usize) -> *mut u8 { + assert!(self.has_space_for(size, align)); + self.position = align_up(self.position, align); + let ptr = unsafe { self.ptr.add(self.position) }; + self.position += size; + ptr + } + + fn has_space_for(&self, size: usize, align: usize) -> bool { + !self.finalized && align_up(self.position, align) + size <= self.len + } +} + +/// `ArenaMemoryProvider` allocates segments from a contiguous memory region +/// that is reserved up-front. +/// +/// The arena's memory is initially allocated with PROT_NONE and gradually +/// updated as the JIT requires more space. This approach allows for stable +/// addresses throughout the lifetime of the JIT. +/// +/// Depending on the underlying platform, requesting large parts of the address +/// space to be allocated might fail. This implementation currently doesn't do +/// overcommit on Windows. +/// +/// Note: Memory will be leaked by default unless +/// [`JITMemoryProvider::free_memory`] is called to ensure function pointers +/// remain valid for the remainder of the program's life. +pub struct ArenaMemoryProvider { + alloc: ManuallyDrop>, + ptr: *mut u8, + size: usize, + position: usize, + segments: Vec, +} + +impl ArenaMemoryProvider { + /// Create a new memory region with the given size. + pub fn new_with_size(reserve_size: usize) -> Result { + let size = align_up(reserve_size, region::page::size()); + // Note: The region crate uses `MEM_RESERVE | MEM_COMMIT` on Windows. + // This means that allocations that exceed the page file plus system + // memory will fail here. + // https://github.com/darfink/region-rs/pull/34 + let mut alloc = region::alloc(size, region::Protection::NONE)?; + let ptr = alloc.as_mut_ptr(); + + Ok(Self { + alloc: ManuallyDrop::new(Some(alloc)), + segments: Vec::new(), + ptr, + size, + position: 0, + }) + } + + fn allocate( + &mut self, + size: usize, + align: u64, + protection: region::Protection, + ) -> io::Result<*mut u8> { + let align = usize::try_from(align).expect("alignment too big"); + assert!( + align <= region::page::size(), + "alignment over page size is not supported" + ); + + // Note: Add a fast path without a linear scan over segments here? + + // Can we fit this allocation into an existing segment? + if let Some(segment) = self.segments.iter_mut().find(|seg| { + seg.target_prot == protection && !seg.finalized && seg.has_space_for(size, align) + }) { + return Ok(segment.allocate(size, align)); + } + + // Can we resize the last segment? + if let Some(segment) = self.segments.iter_mut().last() { + if segment.target_prot == protection && !segment.finalized { + let additional_size = align_up(size, region::page::size()); + + // If our reserved arena can fit the additional size, extend the + // last segment. + if self.position + additional_size <= self.size { + segment.len += additional_size; + segment.set_rw(); + self.position += additional_size; + return Ok(segment.allocate(size, align)); + } + } + } + + // Allocate new segment for given size and alignment. + self.allocate_segment(size, protection)?; + let i = self.segments.len() - 1; + Ok(self.segments[i].allocate(size, align)) + } + + fn allocate_segment( + &mut self, + size: usize, + target_prot: region::Protection, + ) -> Result<(), io::Error> { + let size = align_up(size, region::page::size()); + let ptr = unsafe { self.ptr.add(self.position) }; + if self.position + size > self.size { + return Err(io::Error::new( + io::ErrorKind::Other, + "pre-allocated jit memory region exhausted", + )); + } + self.position += size; + self.segments.push(Segment::new(ptr, size, target_prot)); + Ok(()) + } + + pub(crate) fn finalize(&mut self, branch_protection: BranchProtection) { + for segment in &mut self.segments { + segment.finalize(branch_protection); + } + + // Flush any in-flight instructions from the pipeline + wasmtime_jit_icache_coherence::pipeline_flush_mt().expect("Failed pipeline flush"); + } + + /// Frees the allocated memory region, which would be leaked otherwise. + /// Likely to invalidate existing function pointers, causing unsafety. + pub(crate) unsafe fn free_memory(&mut self) { + if self.ptr == ptr::null_mut() { + return; + } + self.segments.clear(); + // Drop the allocation, freeing memory. + let _: Option = self.alloc.take(); + self.ptr = ptr::null_mut(); + } +} + +impl Drop for ArenaMemoryProvider { + fn drop(&mut self) { + if self.ptr == ptr::null_mut() { + return; + } + let is_live = self.segments.iter().any(|seg| seg.finalized); + if !is_live { + // Only free memory if it's not been finalized yet. + // Otherwise, leak it since JIT memory may still be in use. + unsafe { self.free_memory() }; + } + } +} + +impl JITMemoryProvider for ArenaMemoryProvider { + fn allocate_readexec(&mut self, size: usize, align: u64) -> io::Result<*mut u8> { + self.allocate(size, align, region::Protection::READ_EXECUTE) + } + + fn allocate_readwrite(&mut self, size: usize, align: u64) -> io::Result<*mut u8> { + self.allocate(size, align, region::Protection::READ_WRITE) + } + + fn allocate_readonly(&mut self, size: usize, align: u64) -> io::Result<*mut u8> { + self.allocate(size, align, region::Protection::READ) + } + + unsafe fn free_memory(&mut self) { + self.free_memory(); + } + + fn finalize(&mut self, branch_protection: BranchProtection) -> ModuleResult<()> { + self.finalize(branch_protection); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn alignment_ok() { + let mut arena = ArenaMemoryProvider::new_with_size(1 << 20).unwrap(); + + for align_log2 in 0..8 { + let align = 1usize << align_log2; + for size in 1..128 { + let ptr = arena.allocate_readwrite(size, align as u64).unwrap(); + // assert!(ptr.is_aligned_to(align)); + assert_eq!(ptr.addr() % align, 0); + } + } + } + + #[test] + #[cfg(all(target_pointer_width = "64", not(target_os = "windows")))] + // Windows: See https://github.com/darfink/region-rs/pull/34 + fn large_virtual_allocation() { + // We should be able to request 1TB of virtual address space on 64-bit + // platforms. Physical memory should be committed as we go. + let reserve_size = 1 << 40; + let mut arena = ArenaMemoryProvider::new_with_size(reserve_size).unwrap(); + let ptr = arena.allocate_readwrite(1, 1).unwrap(); + assert_eq!(ptr.addr(), arena.ptr.addr()); + arena.finalize(BranchProtection::None); + unsafe { ptr.write_volatile(42) }; + unsafe { arena.free_memory() }; + } + + #[test] + fn over_capacity() { + let mut arena = ArenaMemoryProvider::new_with_size(1 << 20).unwrap(); // 1 MB + + let _ = arena.allocate_readwrite(900_000, 1).unwrap(); + let _ = arena.allocate_readwrite(200_000, 1).unwrap_err(); + } +} diff --git a/cranelift/jit/src/memory/mod.rs b/cranelift/jit/src/memory/mod.rs new file mode 100644 index 000000000000..9052d2da56e6 --- /dev/null +++ b/cranelift/jit/src/memory/mod.rs @@ -0,0 +1,80 @@ +use cranelift_module::{ModuleError, ModuleResult}; +use std::io; + +mod arena; +mod system; + +pub use arena::ArenaMemoryProvider; +pub use system::SystemMemoryProvider; + +/// Type of branch protection to apply to executable memory. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum BranchProtection { + /// No protection. + None, + /// Use the Branch Target Identification extension of the Arm architecture. + BTI, +} + +/// A provider of memory for the JIT. +pub trait JITMemoryProvider { + /// Allocate memory that will be executable once finalized. + fn allocate_readexec(&mut self, size: usize, align: u64) -> io::Result<*mut u8>; + /// Allocate writable memory. + fn allocate_readwrite(&mut self, size: usize, align: u64) -> io::Result<*mut u8>; + /// Allocate memory that will be read-only once finalized. + fn allocate_readonly(&mut self, size: usize, align: u64) -> io::Result<*mut u8>; + + /// Free the memory region. + unsafe fn free_memory(&mut self); + /// Finalize the memory region and apply memory protections. + fn finalize(&mut self, branch_protection: BranchProtection) -> ModuleResult<()>; +} + +/// Marks the memory region as readable and executable. +/// +/// This function deals with applies branch protection and clears the icache, +/// but *doesn't* flush the pipeline. Callers have to ensure that +/// [`wasmtime_jit_icache_coherence::pipeline_flush_mt`] is called before the +/// mappings are used. +pub(crate) fn set_readable_and_executable( + ptr: *mut u8, + len: usize, + branch_protection: BranchProtection, +) -> ModuleResult<()> { + // Clear all the newly allocated code from cache if the processor requires it + // + // Do this before marking the memory as R+X, technically we should be able to do it after + // but there are some CPU's that have had errata about doing this with read only memory. + unsafe { + wasmtime_jit_icache_coherence::clear_cache(ptr as *const libc::c_void, len) + .expect("Failed cache clear") + }; + + unsafe { + region::protect(ptr, len, region::Protection::READ_EXECUTE).map_err(|e| { + ModuleError::Backend( + anyhow::Error::new(e).context("unable to make memory readable+executable"), + ) + })?; + } + + // If BTI is requested, and the architecture supports it, use mprotect to set the PROT_BTI flag. + if branch_protection == BranchProtection::BTI { + #[cfg(all(target_arch = "aarch64", target_os = "linux"))] + if std::arch::is_aarch64_feature_detected!("bti") { + let prot = libc::PROT_EXEC | libc::PROT_READ | /* PROT_BTI */ 0x10; + + unsafe { + if libc::mprotect(ptr as *mut libc::c_void, len, prot) < 0 { + return Err(ModuleError::Backend( + anyhow::Error::new(io::Error::last_os_error()) + .context("unable to make memory readable+executable"), + )); + } + } + } + } + + Ok(()) +} diff --git a/cranelift/jit/src/memory.rs b/cranelift/jit/src/memory/system.rs similarity index 74% rename from cranelift/jit/src/memory.rs rename to cranelift/jit/src/memory/system.rs index 3e005716d26d..715c155f50f3 100644 --- a/cranelift/jit/src/memory.rs +++ b/cranelift/jit/src/memory/system.rs @@ -5,11 +5,12 @@ use memmap2::MmapMut; #[cfg(not(any(feature = "selinux-fix", windows)))] use std::alloc; -use std::ffi::c_void; use std::io; use std::mem; use std::ptr; -use wasmtime_jit_icache_coherence as icache_coherence; + +use super::BranchProtection; +use super::JITMemoryProvider; /// A simple struct consisting of a pointer and length. struct PtrLen { @@ -111,15 +112,6 @@ impl Drop for PtrLen { // TODO: add a `Drop` impl for `cfg(target_os = "windows")` -/// Type of branch protection to apply to executable memory. -#[derive(Clone, Debug, PartialEq)] -pub(crate) enum BranchProtection { - /// No protection. - None, - /// Use the Branch Target Identification extension of the Arm architecture. - BTI, -} - /// JIT memory manager. This manages pages of suitably aligned and /// accessible memory. Memory will be leaked by default to have /// function pointers remain valid for the remainder of the @@ -129,19 +121,17 @@ pub(crate) struct Memory { already_protected: usize, current: PtrLen, position: usize, - branch_protection: BranchProtection, } unsafe impl Send for Memory {} impl Memory { - pub(crate) fn new(branch_protection: BranchProtection) -> Self { + pub(crate) fn new() -> Self { Self { allocations: Vec::new(), already_protected: 0, current: PtrLen::new(), position: 0, - branch_protection, } } @@ -175,55 +165,18 @@ impl Memory { } /// Set all memory allocated in this `Memory` up to now as readable and executable. - pub(crate) fn set_readable_and_executable(&mut self) -> ModuleResult<()> { + pub(crate) fn set_readable_and_executable( + &mut self, + branch_protection: BranchProtection, + ) -> ModuleResult<()> { self.finish_current(); - // Clear all the newly allocated code from cache if the processor requires it - // - // Do this before marking the memory as R+X, technically we should be able to do it after - // but there are some CPU's that have had errata about doing this with read only memory. for &PtrLen { ptr, len, .. } in self.non_protected_allocations_iter() { - unsafe { - icache_coherence::clear_cache(ptr as *const c_void, len) - .expect("Failed cache clear") - }; - } - - let set_region_readable_and_executable = |ptr, len| -> ModuleResult<()> { - if self.branch_protection == BranchProtection::BTI { - #[cfg(all(target_arch = "aarch64", target_os = "linux"))] - if std::arch::is_aarch64_feature_detected!("bti") { - let prot = libc::PROT_EXEC | libc::PROT_READ | /* PROT_BTI */ 0x10; - - unsafe { - if libc::mprotect(ptr as *mut libc::c_void, len, prot) < 0 { - return Err(ModuleError::Backend( - anyhow::Error::new(io::Error::last_os_error()) - .context("unable to make memory readable+executable"), - )); - } - } - - return Ok(()); - } - } - - unsafe { - region::protect(ptr, len, region::Protection::READ_EXECUTE).map_err(|e| { - ModuleError::Backend( - anyhow::Error::new(e).context("unable to make memory readable+executable"), - ) - })?; - } - Ok(()) - }; - - for &PtrLen { ptr, len, .. } in self.non_protected_allocations_iter() { - set_region_readable_and_executable(ptr, len)?; + super::set_readable_and_executable(ptr, len, branch_protection)?; } // Flush any in-flight instructions from the pipeline - icache_coherence::pipeline_flush_mt().expect("Failed pipeline flush"); + wasmtime_jit_icache_coherence::pipeline_flush_mt().expect("Failed pipeline flush"); self.already_protected = self.allocations.len(); Ok(()) @@ -274,3 +227,51 @@ impl Drop for Memory { .for_each(mem::forget); } } + +/// A memory provider that allocates memory on-demand using the system +/// allocator. +/// +/// Note: Memory will be leaked by default unless +/// [`JITMemoryProvider::free_memory`] is called to ensure function pointers +/// remain valid for the remainder of the program's life. +pub struct SystemMemoryProvider { + code: Memory, + readonly: Memory, + writable: Memory, +} + +impl SystemMemoryProvider { + /// Create a new memory handle with the given branch protection. + pub fn new() -> Self { + Self { + code: Memory::new(), + readonly: Memory::new(), + writable: Memory::new(), + } + } +} + +impl JITMemoryProvider for SystemMemoryProvider { + unsafe fn free_memory(&mut self) { + self.code.free_memory(); + self.readonly.free_memory(); + self.writable.free_memory(); + } + + fn finalize(&mut self, branch_protection: BranchProtection) -> ModuleResult<()> { + self.readonly.set_readonly()?; + self.code.set_readable_and_executable(branch_protection) + } + + fn allocate_readexec(&mut self, size: usize, align: u64) -> io::Result<*mut u8> { + self.code.allocate(size, align) + } + + fn allocate_readwrite(&mut self, size: usize, align: u64) -> io::Result<*mut u8> { + self.writable.allocate(size, align) + } + + fn allocate_readonly(&mut self, size: usize, align: u64) -> io::Result<*mut u8> { + self.readonly.allocate(size, align) + } +}