From e5830ef653c14ec50c883ecce0793d3caddfc865 Mon Sep 17 00:00:00 2001 From: Mat Hostetter Date: Sat, 10 Oct 2020 17:47:48 -0700 Subject: [PATCH] Optimize `Entities::reserve_entity` to reduce memory and atomics TL;DR: this cuts in half the number of atomic ops per `Entities::reserve_entity` and also cuts in half the memory overhead related to Entity freelisting and allocation. Entities maintained two tables: `reserved` and `free`, each with one slot for every possible Entity. This way every entity could be freelisted or reserved without needing more memory. Combined, these tables took 2*meta.len() slots. The key observation is that any entity can only be in one table or the other, but not both, so we actually only need meta.size() slots total, i.e. half the memory. One way to take advantage of this would be to have the freelist grow "up" from the bottom of a Vec, and reserved grow "down" from the top of the same Vec, because we know they can't meet in the middle. But there is a better way. This change combines `free` and `reserved` into `unused`, cutting the memory in half, and also lays them out like this: ``` ------------------------------------------ | freelist | reserved | | ------------------------------------------ ^ ^ freelist_cursor reserved_cursor ``` When `reserve_entity` wants to move something from the freelist to the reserved list, it simply atomically decrements `freelist_cursor`, the boundary between the two, without moving any array entries. This avoids the previous atomic operation to update `reserved`. `reserved_cursor` no longer needs to be atomic because it is only modified by `&mut` methods. This approach does make `alloc()` and `free()` slightly more complicated. `alloc()` has to update `reserved_cursor` to match `freelist_cursor` (just a single store). This works if you can't have unflushed data when you alloc, which is my understanding. If `free` tries to push an item and there are reserved values, it needs to move the first reserved value to the end of the reserved range to make room for the larger freelist. I don't know if this can happen -- is it actually legal to free into an unflushed Entities? If not, then we don't need some of this code. This change also has a few other tweaks: - When grow() dumps copies values into the freelist, put them "before" the old freelist values (if any), so they get popped last. This way we are more likely to hand out lower IDs, only allocating higher IDs when we really need them. - Added some unit tests. I wasn't completely clear on the `reserved_len` (etc.) code and its requirements (what is legal while iterating with those?), so I took my best guess how to update it. --- crates/bevy_ecs/hecs/src/entities.rs | 236 ++++++++++++++++++++------- 1 file changed, 178 insertions(+), 58 deletions(-) diff --git a/crates/bevy_ecs/hecs/src/entities.rs b/crates/bevy_ecs/hecs/src/entities.rs index 929a2c8c60092..9f818d1203b3e 100644 --- a/crates/bevy_ecs/hecs/src/entities.rs +++ b/crates/bevy_ecs/hecs/src/entities.rs @@ -1,4 +1,4 @@ -use alloc::{boxed::Box, vec::Vec}; +use alloc::vec::Vec; use core::{ convert::TryFrom, fmt, mem, @@ -61,17 +61,28 @@ impl fmt::Debug for Entity { #[derive(Debug, Default)] pub(crate) struct Entities { pub meta: Vec, + // Reserved entities outside the range of `meta`, having implicit generation 0, archetype 0, and // undefined index. Calling `flush` converts these to real entities, which can have a fully // defined location. pending: AtomicU32, - // Unused entity IDs below `meta.len()` - free: Vec, + + // Unused entity IDs below `meta.len()`, containing a freelist followed by the reserved IDs. + // By decrementing `free_cursor`, we move an ID from the freelist to reserved without + // actually modifying the array. + // + // This is the same size as meta, so we can freelist every Entity in existence. + unused: Vec, + + // `unused[0..free_cursor]` are freelisted IDs, all below `meta.len()`. free_cursor: AtomicU32, - // Reserved IDs within `meta.len()` with implicit archetype 0 and undefined index. Should be - // consumed and used to initialize locations to produce real entities after calling `flush`. - reserved: Box<[AtomicU32]>, - reserved_cursor: AtomicU32, + + // `unused[free_cursor..reserved_cursor]` are reserved IDs, all below `meta.len()`. + // They need to be consumed and used to initialize locations to produce real entities after + // calling `flush`. + // + // The invariant is `reserved_cursor >= free_cursor`. + reserved_cursor: u32, } impl Entities { @@ -96,8 +107,9 @@ impl Entities { .expect("too many entities"), }; } - // The freelist has entities in it, so move the last entry to the reserved list, to - // be consumed by the caller as part of a higher-level flush. + // The freelist has entities in it, so shift over the boundary between the + // freelisted values and the reserved values by one. Reserved values will be + // consumed by the caller as part of a higher-level flush. Some(next) => { // We don't care about memory ordering here so long as we get our slot. if self @@ -108,9 +120,7 @@ impl Entities { // Another thread already consumed this slot, start over. continue; } - let id = self.free[next as usize]; - let reservation = self.reserved_cursor.fetch_add(1, Ordering::Relaxed); - self.reserved[reservation as usize].store(id, Ordering::Relaxed); + let id = self.unused[next as usize]; return Entity { generation: self.meta[id as usize].generation, id, @@ -129,26 +139,32 @@ impl Entities { 0, "allocator must be flushed before potentially growing" ); - let index = self.free_cursor.load(Ordering::Relaxed); - match index.checked_sub(1) { + + let free_cursor = self.free_cursor.load(Ordering::Relaxed); + + debug_assert_eq!( + self.reserved_cursor, free_cursor, + "allocator must be flushed before potentially growing" + ); + + let new_free_cursor = match free_cursor.checked_sub(1) { None => { self.grow(0); - let cursor = self.free_cursor.fetch_sub(1, Ordering::Relaxed); - let id = self.free[(cursor - 1) as usize]; - Entity { - generation: self.meta[id as usize].generation, - id, - } - } - Some(next) => { - // Not racey due to &mut self - self.free_cursor.store(next, Ordering::Relaxed); - let id = self.free[next as usize]; - Entity { - generation: self.meta[id as usize].generation, - id, - } + self.free_cursor.load(Ordering::Relaxed) - 1 // Not racey due to &mut self } + Some(next) => next, + }; + + let id = self.unused[new_free_cursor as usize]; + + self.free_cursor.store(new_free_cursor, Ordering::Relaxed); // Not racey due to &mut self + + // Nothing is reserved, because we flushed, so reserved_cursor == free_cursor. + self.reserved_cursor = new_free_cursor; + + Entity { + generation: self.meta[id as usize].generation, + id, } } @@ -169,8 +185,22 @@ impl Entities { index: usize::max_value(), }, ); + let index = self.free_cursor.fetch_add(1, Ordering::Relaxed); // Not racey due to &mut self - self.free[index as usize] = entity.id; + + let reserved_cursor = self.reserved_cursor; + self.reserved_cursor = reserved_cursor + 1; + if reserved_cursor > index { + // We need to slide up the reserved range, moving the first reserved ID + // to the end, to make room. + // + // QUESTION: I think this can only happen if we can free IDs while unflushed + // stuff exists, is that legal? There's no assert preventing it. If it's + // illegal we can delete this, because there can't be reserved things here. + self.unused[reserved_cursor as usize] = self.unused[index as usize]; + } + + self.unused[index as usize] = entity.id; debug_assert!( loc.index != usize::max_value(), "free called on reserved entity without flush" @@ -178,7 +208,7 @@ impl Entities { Ok(loc) } - /// Ensure `n` at least allocations can succeed without reallocating + /// Ensure at least `n` allocations can succeed without reallocating pub fn reserve(&mut self, additional: u32) { debug_assert_eq!( self.pending.load(Ordering::Relaxed), @@ -200,13 +230,13 @@ impl Entities { pub fn clear(&mut self) { // Not racey due to &mut self - self.free_cursor - .store(self.meta.len() as u32, Ordering::Relaxed); - for (i, x) in self.free.iter_mut().enumerate() { + let end = self.unused.len() as u32; + self.free_cursor.store(end, Ordering::Relaxed); + self.reserved_cursor = end; + for (i, x) in self.unused.iter_mut().enumerate() { *x = i as u32; } self.pending.store(0, Ordering::Relaxed); - self.reserved_cursor.store(0, Ordering::Relaxed); } /// Access the location storage of an entity @@ -258,17 +288,19 @@ impl Entities { // The following three methods allow iteration over `reserved` simultaneous to location // writes. This is a lazy hack, but we only use it in `World::flush` so the complexity and unsafety // involved in producing an `impl Iterator` isn't a clear win. - pub fn reserved_len(&self) -> u32 { - self.reserved_cursor.load(Ordering::Relaxed) + pub fn reserved_len(&mut self) -> u32 { + self.reserved_cursor - self.free_cursor.load(Ordering::Relaxed) } - pub fn reserved(&self, i: u32) -> u32 { + pub fn reserved(&mut self, i: u32) -> u32 { debug_assert!(i < self.reserved_len()); - self.reserved[i as usize].load(Ordering::Relaxed) + let free_cursor = self.free_cursor.load(Ordering::Relaxed) as usize; + self.unused[free_cursor + i as usize] } pub fn clear_reserved(&mut self) { - self.reserved_cursor.store(0, Ordering::Relaxed); + let free_cursor = self.free_cursor.load(Ordering::Relaxed); // Not racey due to &mut self + self.reserved_cursor = free_cursor; } /// Expand storage and mark all but the first `pending` of the new slots as free @@ -291,27 +323,31 @@ impl Entities { ); let free_cursor = self.free_cursor.load(Ordering::Relaxed); // Not racey due to &mut self - let mut new_free = Vec::with_capacity(new_len); - new_free.extend_from_slice(&self.free[0..free_cursor as usize]); - // Add freshly allocated trailing free slots - new_free.extend(((self.meta.len() as u32 + pending)..new_len as u32).rev()); - debug_assert!(new_free.len() <= new_len); - self.free_cursor - .store(new_free.len() as u32, Ordering::Relaxed); // Not racey due to &mut self + let reserved_cursor = self.reserved_cursor; // Not racey due to &mut self + + let mut new_unused = Vec::with_capacity(new_len); + + // Add freshly allocated trailing free slots. List them first since they are + // higher-numbered than any existing freelist entries, meaning we'll pop them last. + new_unused.extend(((self.meta.len() as u32 + pending)..new_len as u32).rev()); - // Zero-fill - new_free.resize(new_len, 0); + // Insert the original freelist, if any. + new_unused.extend_from_slice(&self.unused[0..free_cursor as usize]); + + let new_free_cursor = new_unused.len() as u32; + self.free_cursor.store(new_free_cursor, Ordering::Relaxed); // Not racey due to &mut self + + // Preserve any reserved values. + new_unused.extend_from_slice(&self.unused[free_cursor as usize..reserved_cursor as usize]); + self.reserved_cursor = new_unused.len() as u32; + + debug_assert!(new_unused.len() <= new_len); + + // Zero-fill. This gives us enough room to freelist every ID in existence. + new_unused.resize(new_len, 0); self.meta = new_meta; - self.free = new_free; - let mut new_reserved = Vec::with_capacity(new_len); - // Not racey due to &mut self - let reserved_cursor = self.reserved_cursor.load(Ordering::Relaxed); - for x in &self.reserved[..reserved_cursor as usize] { - new_reserved.push(AtomicU32::new(x.load(Ordering::Relaxed))); - } - new_reserved.resize_with(new_len, || AtomicU32::new(0)); - self.reserved = new_reserved.into(); + self.unused = new_unused; } pub fn get_reserver(&self) -> EntityReserver { @@ -366,6 +402,8 @@ impl Error for NoSuchEntity {} #[cfg(test)] mod tests { use super::*; + use rand::Rng; + use std::collections::{HashMap, HashSet}; #[test] fn entity_bits_roundtrip() { @@ -375,4 +413,86 @@ mod tests { }; assert_eq!(Entity::from_bits(e.to_bits()), e); } + + #[test] + fn alloc_and_free() { + let mut rng = rand::thread_rng(); + + let mut e = Entities::default(); + let mut first_unused = 0u32; + let mut id_to_gen: HashMap = Default::default(); + let mut free_set: HashSet = Default::default(); + + for _ in 0..100000 { + let alloc = rng.gen_range(0, 3) != 0; + if alloc || first_unused == 0 { + let entity = e.alloc(); + + let id = entity.id; + if !free_set.is_empty() { + // This should have come from the freelist. + assert!(free_set.remove(&id)); + } else if id >= first_unused { + first_unused = id + 1; + } + + e.get_mut(entity).unwrap().index = 37; + + assert!(id_to_gen.insert(id, entity.generation).is_none()); + } else { + // Free a random ID, whether or not it's in use, and check for errors. + let id = rng.gen_range(0, first_unused); + + let generation = id_to_gen.remove(&id); + let entity = Entity { + id, + generation: generation.unwrap_or(0), + }; + + assert_eq!(e.free(entity).is_ok(), generation.is_some()); + + free_set.insert(id); + } + } + } + + #[test] + fn reserve_entity() { + let mut e = Entities::default(); + + // Allocate and ignore a bunch of items to mostly drain the initial freelist. + e.grow(0); + let skip = e.free_cursor.load(Ordering::Relaxed) - 10; + let _v0: Vec = (0..skip).map(|_| e.alloc()).collect(); + + // Allocate 10 items. + let mut v1: Vec = (0..10).map(|_| e.alloc()).collect(); + assert_eq!(v1.iter().map(|e| e.id).max(), Some(1023)); + for &entity in v1.iter() { + e.get_mut(entity).unwrap().index = 37; + } + + // Put the last 4 on the freelist. + for entity in v1.drain(6..) { + e.free(entity).unwrap(); + } + assert_eq!(e.free_cursor.load(Ordering::Relaxed), 4); + + // Allocate 10 entities, so 4 will come from the freelist. + // This means we will have allocated 10 + 10 - 4 total items, so max id is 15. + let v2: Vec = (0..10).map(|_| e.reserve_entity()).collect(); + assert_eq!(v2.iter().map(|e| e.id).max(), Some(skip + 15)); + + // We should have exactly IDs skip..skip+16. + let mut v3: Vec = v1.iter().chain(v2.iter()).copied().collect(); + assert_eq!(v3.len(), 16); + v3.sort_by_key(|entity| entity.id); + for (i, entity) in v3.into_iter().enumerate() { + assert_eq!(entity.id, skip + i as u32); + } + + // 6 will come from pending. + assert_eq!(e.pending.load(Ordering::Relaxed), 6); + assert_eq!(e.flush().count(), 6); + } }