From 257abad4979b6ea4a9e4eaae07dfeecbe8192192 Mon Sep 17 00:00:00 2001 From: David Frank Date: Mon, 2 Dec 2024 22:17:49 +0000 Subject: [PATCH 1/3] Optimize buckets --- canbench_results.yml | 48 ++++++------ src/memory_manager.rs | 178 ++++++++++++++++++++++++++---------------- 2 files changed, 133 insertions(+), 93 deletions(-) diff --git a/canbench_results.yml b/canbench_results.yml index 577e4c46..7695c903 100644 --- a/canbench_results.yml +++ b/canbench_results.yml @@ -73,10 +73,10 @@ benches: scopes: {} btreemap_get_blob_512_1024_v2_mem_manager: total: - instructions: 2755579551 + instructions: 2621506893 heap_increase: 0 stable_memory_increase: 0 - scopes: { } + scopes: {} btreemap_get_blob_64_1024: total: instructions: 572513751 @@ -103,13 +103,13 @@ benches: scopes: {} btreemap_get_blob_8_u64: total: - instructions: 196934591 + instructions: 196964591 heap_increase: 0 stable_memory_increase: 0 scopes: {} btreemap_get_blob_8_u64_v2: total: - instructions: 291577086 + instructions: 291607086 heap_increase: 0 stable_memory_increase: 0 scopes: {} @@ -127,19 +127,19 @@ benches: scopes: {} btreemap_get_u64_u64: total: - instructions: 177269159 + instructions: 177299159 heap_increase: 0 stable_memory_increase: 0 scopes: {} btreemap_get_u64_u64_v2: total: - instructions: 255485892 + instructions: 255515892 heap_increase: 0 stable_memory_increase: 0 - scopes: { } + scopes: {} btreemap_get_u64_u64_v2_mem_manager: total: - instructions: 522662446 + instructions: 419339926 heap_increase: 0 stable_memory_increase: 0 scopes: {} @@ -220,10 +220,10 @@ benches: instructions: 5070548871 heap_increase: 0 stable_memory_increase: 261 - scopes: { } + scopes: {} btreemap_insert_blob_1024_512_v2_mem_manager: total: - instructions: 5594888576 + instructions: 5399122036 heap_increase: 0 stable_memory_increase: 256 scopes: {} @@ -349,13 +349,13 @@ benches: scopes: {} btreemap_insert_blob_8_u64: total: - instructions: 324571604 + instructions: 324578222 heap_increase: 0 stable_memory_increase: 6 scopes: {} btreemap_insert_blob_8_u64_v2: total: - instructions: 429525034 + instructions: 429531652 heap_increase: 0 stable_memory_increase: 4 scopes: {} @@ -379,10 +379,10 @@ benches: scopes: {} btreemap_insert_u64_u64_mem_manager: total: - instructions: 830923183 + instructions: 677264271 heap_increase: 0 stable_memory_increase: 0 - scopes: { } + scopes: {} btreemap_insert_u64_u64_v2: total: instructions: 421501977 @@ -559,13 +559,13 @@ benches: scopes: {} btreemap_remove_blob_8_u64: total: - instructions: 425348358 + instructions: 425371740 heap_increase: 0 stable_memory_increase: 0 scopes: {} btreemap_remove_blob_8_u64_v2: total: - instructions: 566089549 + instructions: 566112931 heap_increase: 0 stable_memory_increase: 0 scopes: {} @@ -583,13 +583,13 @@ benches: scopes: {} btreemap_remove_u64_u64: total: - instructions: 491364624 + instructions: 491394624 heap_increase: 0 stable_memory_increase: 0 scopes: {} btreemap_remove_u64_u64_v2: total: - instructions: 609098227 + instructions: 609128227 heap_increase: 0 stable_memory_increase: 0 scopes: {} @@ -631,7 +631,7 @@ benches: scopes: {} memory_manager_overhead: total: - instructions: 1182056386 + instructions: 1182001071 heap_increase: 0 stable_memory_increase: 8320 scopes: {} @@ -661,19 +661,19 @@ benches: scopes: {} vec_get_blob_4_mem_manager: total: - instructions: 12194073 + instructions: 9246773 heap_increase: 0 stable_memory_increase: 0 - scopes: { } + scopes: {} vec_get_blob_64: total: instructions: 12960294 heap_increase: 0 stable_memory_increase: 0 - scopes: { } + scopes: {} vec_get_blob_64_mem_manager: total: - instructions: 20656466 + instructions: 17603274 heap_increase: 0 stable_memory_increase: 0 scopes: {} @@ -685,7 +685,7 @@ benches: scopes: {} vec_get_u64: total: - instructions: 5390319 + instructions: 5420319 heap_increase: 0 stable_memory_increase: 0 scopes: {} diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 8e0396c7..65bec4f7 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -380,14 +380,14 @@ impl MemoryManagerInner { } let mut bytes_written = 0; - for Segment { address, length } in self.bucket_iter(id, offset, src.len()) { + self.for_each_bucket(id, offset, src.len(), |Segment { address, length }| { self.memory.write( address.get(), &src[bytes_written as usize..(bytes_written + length.get()) as usize], ); bytes_written += length.get(); - } + }); } #[inline] @@ -407,7 +407,7 @@ impl MemoryManagerInner { } let mut bytes_read: usize = 0; - for Segment { address, length } in self.bucket_iter(id, offset, count) { + self.for_each_bucket(id, offset, count, |Segment { address, length }| { let length = length.get().try_into().expect("Length overflows usize"); self.memory .read_unsafe(address.get(), dst.add(bytes_read), length); @@ -415,22 +415,74 @@ impl MemoryManagerInner { bytes_read = bytes_read .checked_add(length) .expect("Bytes read overflowed usize"); - } + }) } /// Initializes a [`BucketIterator`]. - fn bucket_iter(&self, MemoryId(id): MemoryId, offset: u64, length: usize) -> BucketIterator { + fn for_each_bucket( + &self, + MemoryId(id): MemoryId, + offset: u64, + length: usize, + mut func: impl FnMut(Segment), + ) { + if length == 0 { + return; + } + + // TODO: check length // Get the buckets allocated to the given memory id. - let buckets = self.memory_buckets[id as usize].as_slice(); - - BucketIterator { - virtual_segment: Segment { - address: Address::from(offset), - length: Bytes::from(length as u64), - }, - buckets, - bucket_size_in_bytes: self.bucket_size_in_bytes(), + let mut buckets = self.memory_buckets[id as usize].as_slice(); + let mut virtual_segment = Segment { + address: Address::from(offset), + length: Bytes::from(length as u64), + }; + + let bucket_size_in_bytes = self.bucket_size_in_bytes().get(); + // starting bucket index + let mut bucket_idx = (offset / bucket_size_in_bytes) as usize; + let start_offset = offset % bucket_size_in_bytes; + let remaining_bytes = bucket_size_in_bytes - start_offset; + + let bucket_address = + self.bucket_address(buckets.get(bucket_idx).expect("bucket idx out of bounds")); + + let first_segment_len = remaining_bytes.min(length as u64); + func(Segment { + address: bucket_address + start_offset.into(), + length: first_segment_len.into(), + }); + + virtual_segment.length -= first_segment_len.into(); + virtual_segment.address += first_segment_len.into(); + + if virtual_segment.length.get() == 0 { + return; } + + for _ in 0..(virtual_segment.length.get() / self.bucket_size_in_bytes().get()) { + bucket_idx += 1; + let bucket_address = + self.bucket_address(buckets.get(bucket_idx).expect("bucket idx out of bounds")); + func(Segment { + address: bucket_address, + length: self.bucket_size_in_bytes(), + }); + } + + if virtual_segment.length.get() == 0 { + return; + } + + bucket_idx += 1; + + let bucket_address = + self.bucket_address(buckets.get(bucket_idx).expect("bucket idx out of bounds")); + + func(Segment { + address: bucket_address, + length: virtual_segment.length % self.bucket_size_in_bytes(), + }); } fn bucket_size_in_bytes(&self) -> Bytes { @@ -447,6 +499,11 @@ impl MemoryManagerInner { pub fn into_memory(self) -> M { self.memory } + + #[inline] + fn bucket_address(&self, id: &BucketId) -> Address { + Address::from(BUCKETS_OFFSET_IN_BYTES) + self.bucket_size_in_bytes() * Bytes::from(id.0) + } } struct Segment { @@ -477,61 +534,44 @@ struct Segment { // // Each of the segments above can then be translated into the real address space by looking up // the underlying buckets' addresses in real memory. -struct BucketIterator<'a> { - virtual_segment: Segment, - buckets: &'a [BucketId], - bucket_size_in_bytes: Bytes, -} - -impl Iterator for BucketIterator<'_> { - type Item = Segment; - - fn next(&mut self) -> Option { - if self.virtual_segment.length == Bytes::from(0u64) { - return None; - } - - // Map the virtual segment's address to a real address. - let bucket_idx = - (self.virtual_segment.address.get() / self.bucket_size_in_bytes.get()) as usize; - let bucket_address = self.bucket_address( - *self - .buckets - .get(bucket_idx) - .expect("bucket idx out of bounds"), - ); - - let real_address = bucket_address - + Bytes::from(self.virtual_segment.address.get() % self.bucket_size_in_bytes.get()); - - // Compute how many bytes are in this real segment. - let bytes_in_segment = { - let next_bucket_address = bucket_address + self.bucket_size_in_bytes; - - // Write up to either the end of the bucket, or the end of the segment. - min( - Bytes::from(next_bucket_address.get() - real_address.get()), - self.virtual_segment.length, - ) - }; - - // Update the virtual segment to exclude the portion we're about to return. - self.virtual_segment.length -= bytes_in_segment; - self.virtual_segment.address += bytes_in_segment; - - Some(Segment { - address: real_address, - length: bytes_in_segment, - }) - } -} - -impl<'a> BucketIterator<'a> { - /// Returns the address of a given bucket. - fn bucket_address(&self, id: BucketId) -> Address { - Address::from(BUCKETS_OFFSET_IN_BYTES) + self.bucket_size_in_bytes * Bytes::from(id.0) - } -} +// struct BucketIterator<'a> { +// virtual_segment: Segment, +// buckets: &'a [BucketId], +// remaining_space_in_bucket: Bytes, +// bucket_size_in_bytes: Bytes, +// } +// +// impl Iterator for BucketIterator<'_> { +// type Item = Segment; +// +// fn next(&mut self) -> Option { +// if self.virtual_segment.length == Bytes::from(0u64) { +// return None; +// } +// +// // Map the virtual segment's address to a real address. +// let bucket_address = +// self.bucket_address(*self.buckets.get(0).expect("bucket idx out of bounds")); +// +// let real_address = bucket_address +// + Bytes::from(self.virtual_segment.address.get() % self.bucket_size_in_bytes.get()); +// +// // Compute how many bytes are in this real segment. +// // Write up to either the remaining space in current bucket, or the end of the segment. +// let bytes_in_segment = min(self.remaining_space_in_bucket, self.virtual_segment.length); +// +// // Update the virtual segment to exclude the portion we're about to return. +// self.virtual_segment.length -= bytes_in_segment; +// self.virtual_segment.address += bytes_in_segment; +// self.buckets = &self.buckets[1..]; +// self.remaining_space_in_bucket = self.bucket_size_in_bytes; +// +// Some(Segment { +// address: real_address, +// length: bytes_in_segment, +// }) +// } +// } #[derive(Clone, Copy, Ord, Eq, PartialEq, PartialOrd, Debug)] pub struct MemoryId(u8); From ed40ff994dc22bd6cea7e46bc9b98b415471a58d Mon Sep 17 00:00:00 2001 From: David Frank Date: Tue, 3 Dec 2024 14:04:37 +0000 Subject: [PATCH 2/3] Clean up code --- canbench_results.yml | 14 +-- src/memory_manager.rs | 223 ++++++++++++++++-------------------------- 2 files changed, 94 insertions(+), 143 deletions(-) diff --git a/canbench_results.yml b/canbench_results.yml index 7695c903..1ed74729 100644 --- a/canbench_results.yml +++ b/canbench_results.yml @@ -73,7 +73,7 @@ benches: scopes: {} btreemap_get_blob_512_1024_v2_mem_manager: total: - instructions: 2621506893 + instructions: 2624136090 heap_increase: 0 stable_memory_increase: 0 scopes: {} @@ -139,7 +139,7 @@ benches: scopes: {} btreemap_get_u64_u64_v2_mem_manager: total: - instructions: 419339926 + instructions: 421366446 heap_increase: 0 stable_memory_increase: 0 scopes: {} @@ -223,7 +223,7 @@ benches: scopes: {} btreemap_insert_blob_1024_512_v2_mem_manager: total: - instructions: 5399122036 + instructions: 5402984503 heap_increase: 0 stable_memory_increase: 256 scopes: {} @@ -379,7 +379,7 @@ benches: scopes: {} btreemap_insert_u64_u64_mem_manager: total: - instructions: 677264271 + instructions: 680292499 heap_increase: 0 stable_memory_increase: 0 scopes: {} @@ -631,7 +631,7 @@ benches: scopes: {} memory_manager_overhead: total: - instructions: 1182001071 + instructions: 1182002161 heap_increase: 0 stable_memory_increase: 8320 scopes: {} @@ -661,7 +661,7 @@ benches: scopes: {} vec_get_blob_4_mem_manager: total: - instructions: 9246773 + instructions: 9333723 heap_increase: 0 stable_memory_increase: 0 scopes: {} @@ -673,7 +673,7 @@ benches: scopes: {} vec_get_blob_64_mem_manager: total: - instructions: 17603274 + instructions: 17664902 heap_increase: 0 stable_memory_increase: 0 scopes: {} diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 65bec4f7..6e045719 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -46,7 +46,6 @@ use crate::{ write, write_struct, Memory, WASM_PAGE_SIZE, }; use std::cell::RefCell; -use std::cmp::min; use std::rc::Rc; const MAGIC: &[u8; 3] = b"MGR"; @@ -380,14 +379,21 @@ impl MemoryManagerInner { } let mut bytes_written = 0; - self.for_each_bucket(id, offset, src.len(), |Segment { address, length }| { - self.memory.write( - address.get(), - &src[bytes_written as usize..(bytes_written + length.get()) as usize], - ); - - bytes_written += length.get(); - }); + self.for_each_bucket( + id, + VirtualSegment { + address: offset.into(), + length: src.len().into(), + }, + |RealSegment { address, length }| { + self.memory.write( + address.get(), + &src[bytes_written as usize..(bytes_written + length.get()) as usize], + ); + + bytes_written += length.get(); + }, + ); } #[inline] @@ -406,83 +412,85 @@ impl MemoryManagerInner { panic!("{id:?}: read out of bounds"); } - let mut bytes_read: usize = 0; - self.for_each_bucket(id, offset, count, |Segment { address, length }| { - let length = length.get().try_into().expect("Length overflows usize"); - self.memory - .read_unsafe(address.get(), dst.add(bytes_read), length); - - bytes_read = bytes_read - .checked_add(length) - .expect("Bytes read overflowed usize"); - }) - } - - /// Initializes a [`BucketIterator`]. + let mut bytes_read = Bytes::new(0); + self.for_each_bucket( + id, + VirtualSegment { + address: offset.into(), + length: count.into(), + }, + |RealSegment { address, length }| { + self.memory.read_unsafe( + address.get(), + // The cast to usize is safe because `bytes_read` and `length` are bounded by + // usize `count`. + dst.add(bytes_read.get() as usize), + length.get() as usize, + ); + + bytes_read += length; + }, + ) + } + + /// Maps a segment of virtual memory to segments of real memory. + /// + /// `func` is invoked with real memory segments of real memory that `virtual_segment` is mapped + /// to. + /// + /// A segment in virtual memory can map to multiple segments of real memory. Here's an example: + /// + /// Virtual Memory + /// ```text + /// -------------------------------------------------------- + /// (A) ---------- SEGMENT ---------- (B) + /// -------------------------------------------------------- + /// ↑ ↑ ↑ ↑ + /// Bucket 0 Bucket 1 Bucket 2 Bucket 3 + /// ``` + /// + /// The [`VirtualMemory`] is internally divided into fixed-size buckets. In the memory's virtual + /// address space, all these buckets are consecutive, but in real memory this may not be the case. + /// + /// A virtual segment would first be split at the bucket boundaries. The example virtual segment + /// above would be split into the following segments: + /// + /// (A, end of bucket 0) + /// (start of bucket 1, end of bucket 1) + /// (start of bucket 2, B) + /// + /// Each of the segments above can then be translated into the real address space by looking up + /// the underlying buckets' addresses in real memory. fn for_each_bucket( &self, MemoryId(id): MemoryId, - offset: u64, - length: usize, - mut func: impl FnMut(Segment), + virtual_segment: VirtualSegment, + mut func: impl FnMut(RealSegment), ) { - if length == 0 { - return; - } - - // TODO: check length + let virtual_offset = virtual_segment.address.get(); + let mut length = virtual_segment.length.get(); // Get the buckets allocated to the given memory id. - let mut buckets = self.memory_buckets[id as usize].as_slice(); - let mut virtual_segment = Segment { - address: Address::from(offset), - length: Bytes::from(length as u64), - }; - + let buckets = self.memory_buckets[id as usize].as_slice(); let bucket_size_in_bytes = self.bucket_size_in_bytes().get(); - // starting bucket index - let mut bucket_idx = (offset / bucket_size_in_bytes) as usize; - let start_offset = offset % bucket_size_in_bytes; - let remaining_bytes = bucket_size_in_bytes - start_offset; - - let bucket_address = - self.bucket_address(buckets.get(bucket_idx).expect("bucket idx out of bounds")); - - let first_segment_len = remaining_bytes.min(length as u64); - func(Segment { - address: bucket_address + start_offset.into(), - length: first_segment_len.into(), - }); - virtual_segment.length -= first_segment_len.into(); - virtual_segment.address += first_segment_len.into(); - - if virtual_segment.length.get() == 0 { - return; - } - - for _ in 0..(virtual_segment.length.get() / self.bucket_size_in_bytes().get()) { - bucket_idx += 1; + // The start offset where we start reading from in a bucket. In the first iteration the + // value is calculated from `virtual_offset`, in later iterations, it's always 0. + let mut start_offset_in_bucket = virtual_offset % bucket_size_in_bytes; + let mut bucket_idx = (virtual_offset / bucket_size_in_bytes) as usize; + while length > 0 { let bucket_address = self.bucket_address(buckets.get(bucket_idx).expect("bucket idx out of bounds")); - func(Segment { - address: bucket_address, - length: self.bucket_size_in_bytes(), + + let segment_len = (bucket_size_in_bytes - start_offset_in_bucket).min(length); + func(RealSegment { + address: bucket_address + start_offset_in_bucket.into(), + length: segment_len.into(), }); - } - if virtual_segment.length.get() == 0 { - return; + length -= segment_len; + bucket_idx += 1; + start_offset_in_bucket = 0; } - - bucket_idx += 1; - - let bucket_address = - self.bucket_address(buckets.get(bucket_idx).expect("bucket idx out of bounds")); - - func(Segment { - address: bucket_address, - length: virtual_segment.length % self.bucket_size_in_bytes(), - }); } fn bucket_size_in_bytes(&self) -> Bytes { @@ -506,72 +514,15 @@ impl MemoryManagerInner { } } -struct Segment { +struct VirtualSegment { address: Address, length: Bytes, } -// An iterator that maps a segment of virtual memory to segments of real memory. -// -// A segment in virtual memory can map to multiple segments of real memory. Here's an example: -// -// Virtual Memory -// -------------------------------------------------------- -// (A) ---------- SEGMENT ---------- (B) -// -------------------------------------------------------- -// ↑ ↑ ↑ ↑ -// Bucket 0 Bucket 1 Bucket 2 Bucket 3 -// -// The [`VirtualMemory`] is internally divided into fixed-size buckets. In the memory's virtual -// address space, all these buckets are consecutive, but in real memory this may not be the case. -// -// A virtual segment would first be split at the bucket boundaries. The example virtual segment -// above would be split into the following segments: -// -// (A, end of bucket 0) -// (start of bucket 1, end of bucket 1) -// (start of bucket 2, B) -// -// Each of the segments above can then be translated into the real address space by looking up -// the underlying buckets' addresses in real memory. -// struct BucketIterator<'a> { -// virtual_segment: Segment, -// buckets: &'a [BucketId], -// remaining_space_in_bucket: Bytes, -// bucket_size_in_bytes: Bytes, -// } -// -// impl Iterator for BucketIterator<'_> { -// type Item = Segment; -// -// fn next(&mut self) -> Option { -// if self.virtual_segment.length == Bytes::from(0u64) { -// return None; -// } -// -// // Map the virtual segment's address to a real address. -// let bucket_address = -// self.bucket_address(*self.buckets.get(0).expect("bucket idx out of bounds")); -// -// let real_address = bucket_address -// + Bytes::from(self.virtual_segment.address.get() % self.bucket_size_in_bytes.get()); -// -// // Compute how many bytes are in this real segment. -// // Write up to either the remaining space in current bucket, or the end of the segment. -// let bytes_in_segment = min(self.remaining_space_in_bucket, self.virtual_segment.length); -// -// // Update the virtual segment to exclude the portion we're about to return. -// self.virtual_segment.length -= bytes_in_segment; -// self.virtual_segment.address += bytes_in_segment; -// self.buckets = &self.buckets[1..]; -// self.remaining_space_in_bucket = self.bucket_size_in_bytes; -// -// Some(Segment { -// address: real_address, -// length: bytes_in_segment, -// }) -// } -// } +struct RealSegment { + address: Address, + length: Bytes, +} #[derive(Clone, Copy, Ord, Eq, PartialEq, PartialOrd, Debug)] pub struct MemoryId(u8); From 35c90f8ab449c03585da20d415704830b4632327 Mon Sep 17 00:00:00 2001 From: David Frank Date: Wed, 4 Dec 2024 09:40:30 +0000 Subject: [PATCH 3/3] Readability --- src/memory_manager.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 6e045719..092cbdb8 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -467,21 +467,23 @@ impl MemoryManagerInner { virtual_segment: VirtualSegment, mut func: impl FnMut(RealSegment), ) { - let virtual_offset = virtual_segment.address.get(); - let mut length = virtual_segment.length.get(); // Get the buckets allocated to the given memory id. let buckets = self.memory_buckets[id as usize].as_slice(); let bucket_size_in_bytes = self.bucket_size_in_bytes().get(); + let virtual_offset = virtual_segment.address.get(); + + let mut length = virtual_segment.length.get(); + let mut bucket_idx = (virtual_offset / bucket_size_in_bytes) as usize; // The start offset where we start reading from in a bucket. In the first iteration the // value is calculated from `virtual_offset`, in later iterations, it's always 0. let mut start_offset_in_bucket = virtual_offset % bucket_size_in_bytes; - let mut bucket_idx = (virtual_offset / bucket_size_in_bytes) as usize; + while length > 0 { let bucket_address = self.bucket_address(buckets.get(bucket_idx).expect("bucket idx out of bounds")); - let segment_len = (bucket_size_in_bytes - start_offset_in_bucket).min(length); + func(RealSegment { address: bucket_address + start_offset_in_bucket.into(), length: segment_len.into(),