From 5abfad4551e1139ba19ec6b132caac352e878ede Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=A7=E9=9B=A8=E5=B0=8F=E6=B0=B4?= <792179245@qq.com> Date: Wed, 20 Aug 2025 15:06:40 +0800 Subject: [PATCH 1/8] Add RVV vectorization to Chameleon encoding for performance boost - Implemented `encode_batch` in `Chameleon` (QuadEncoder) using RISC-V Vector (RVV) intrinsics to vectorize hash computation, dictionary gather, and comparison. - Updated `encode_block` in `Codec` trait to use `encode_batch` for aligned u32 quads, with scalar fallback for prefix/suffix. - Added conditional compilation for RVV (`#[cfg(all(target_arch = riscv64, target_feature = v))]`) with scalar fallback for non-RVV environments. Co-authored-by: gong-flying --- src/codec/codec.rs | 52 +++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/src/codec/codec.rs b/src/codec/codec.rs index 324b55c..e844d6c 100644 --- a/src/codec/codec.rs +++ b/src/codec/codec.rs @@ -39,31 +39,35 @@ pub trait Codec: QuadEncoder + Decoder { let mark = out_buffer.index; signature.init(out_buffer.index); out_buffer.skip(Self::signature_significant_bytes()); - for sub_block in block.chunks(BYTE_SIZE_U128) { - match <&[u8] as TryInto<[u8; BYTE_SIZE_U128]>>::try_into(sub_block) { - Ok(array) => { - let value_u128 = u128::from_le_bytes(array); - self.encode_quad((value_u128 & 0xffffffff) as u32, out_buffer, signature); - self.encode_quad(((value_u128 >> 32) & 0xffffffff) as u32, out_buffer, signature); - self.encode_quad(((value_u128 >> 64) & 0xffffffff) as u32, out_buffer, signature); - self.encode_quad((value_u128 >> 96) as u32, out_buffer, signature); - } - Err(_error) => { - // Less than 16 bytes left - for bytes in sub_block.chunks(BYTE_SIZE_U32) { - match <&[u8] as TryInto<[u8; BYTE_SIZE_U32]>>::try_into(bytes) { - Ok(array) => { - self.encode_quad(u32::from_le_bytes(array), out_buffer, signature); - } - Err(_error) => { - // Implicit signature plain flag (0x0) - out_buffer.push(bytes); - } - } - } - } + + // 安全对齐block到u32,用于批处理 + let (prefix, u32_block, suffix) = unsafe { block.align_to::() }; + + // 处理不对齐前缀(<4字节,稀有;使用标量或直接push) + for bytes in prefix.chunks(BYTE_SIZE_U32) { + if bytes.len() == BYTE_SIZE_U32 { + let quad = u32::from_le_bytes(bytes.try_into().unwrap()); + self.encode_quad(quad, out_buffer, signature); + } else { + // 隐式plain flag + out_buffer.push(bytes); } } + + // 批处理对齐的u32 quads(主要热点路径) + self.encode_batch(u32_block, out_buffer, signature); + + // 处理后缀(<4字节剩余) + for bytes in suffix.chunks(BYTE_SIZE_U32) { + if bytes.len() == BYTE_SIZE_U32 { + let quad = u32::from_le_bytes(bytes.try_into().unwrap()); + self.encode_quad(quad, out_buffer, signature); + } else { + // 隐式plain flag + out_buffer.push(bytes); + } + } + Self::write_signature(out_buffer, signature); protection_state.update(out_buffer.index - mark >= Self::block_size()); } @@ -124,4 +128,4 @@ pub trait Codec: QuadEncoder + Decoder { Ok(out_buffer.index) } -} \ No newline at end of file +} From e945771759bf5178d0fa6195cb6118dcfd34e445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=A7=E9=9B=A8=E5=B0=8F=E6=B0=B4?= <792179245@qq.com> Date: Wed, 20 Aug 2025 15:55:15 +0800 Subject: [PATCH 2/8] fix error Co-authored-by: gong-flying --- src/algorithms/chameleon/chameleon.rs | 75 ++++++++++++++++++++++++--- src/codec/quad_encoder.rs | 8 ++- 2 files changed, 75 insertions(+), 8 deletions(-) diff --git a/src/algorithms/chameleon/chameleon.rs b/src/algorithms/chameleon/chameleon.rs index 21db8fb..7fef6d3 100644 --- a/src/algorithms/chameleon/chameleon.rs +++ b/src/algorithms/chameleon/chameleon.rs @@ -1,3 +1,5 @@ +// File: src/chameleon.rs + use crate::algorithms::PLAIN_FLAG; use crate::codec::codec::Codec; use crate::codec::decoder::Decoder; @@ -9,6 +11,7 @@ use crate::io::read_signature::ReadSignature; use crate::io::write_buffer::WriteBuffer; use crate::io::write_signature::WriteSignature; use crate::{BIT_SIZE_U16, BIT_SIZE_U32, BYTE_SIZE_U32}; +use std::arch::riscv64::*; pub(crate) const CHAMELEON_HASH_BITS: usize = BIT_SIZE_U16; pub(crate) const CHAMELEON_HASH_MULTIPLIER: u32 = 0x9D6EF916; @@ -82,16 +85,74 @@ impl QuadEncoder for Chameleon { out_buffer.push(&hash_u16.to_le_bytes()); } } + + #[inline(always)] + fn encode_batch(&mut self, quads: &[u32], out_buffer: &mut WriteBuffer, signature: &mut WriteSignature) { + #[cfg(not(all(target_arch = "riscv64", target_feature = "v")))] + { + for &quad in quads { + self.encode_quad(quad, out_buffer, signature); + } + return; + } + + #[cfg(all(target_arch = "riscv64", target_feature = "v"))] + unsafe { + let num_quads = quads.len(); + let mut offset = 0; + while offset < num_quads { + let remaining = num_quads - offset; + let vl = vsetvli(remaining, riscv64::riscv_v_sew::E32, riscv64::riscv_v_lmul::M1, riscv64::riscv_v_ta::TA, riscv64::riscv_v_ma::MA); + + let v_quad = vle32_v_u32m1(quads.as_ptr().add(offset) as *const u32, vl); + + let v_mult = vmul_vx_u32m1(v_quad, CHAMELEON_HASH_MULTIPLIER, vl); + let v_hash = vsrl_vx_u32m1(v_mult, BIT_SIZE_U32 - CHAMELEON_HASH_BITS as u32, vl); + + let dict_ptr = self.state.chunk_map.as_mut_ptr(); + let v_dict = vluxei32_v_u32m1(dict_ptr as *const u32, v_hash, vl); + + let v_mask = vmseq_vv_m_b32(v_dict, v_quad, vl); // hit mask (true if match) + + let mut quad_arr = vec![0u32; vl]; + let mut hash_arr = vec![0u32; vl]; + let mut hit_arr = vec![false; vl]; + + vse32_v_u32m1(quad_arr.as_mut_ptr(), v_quad, vl); + vse32_v_u32m1(hash_arr.as_mut_ptr(), v_hash, vl); + + for i in 0..vl { + let single_mask = vslidedown_vx_m_b32(v_mask, i as u32, vl); + hit_arr[i] = vfirst_m_b32(single_mask, 1) != -1; // 如果位 set,则 hit + } + + for i in 0..vl { + let quad = quad_arr[i]; + let hash_u16 = hash_arr[i] as u16; + if hit_arr[i] { + signature.push_bits(MAP_FLAG, FLAG_SIZE_BITS); + out_buffer.push(&hash_u16.to_le_bytes()); + } else { + signature.push_bits(PLAIN_FLAG, FLAG_SIZE_BITS); + out_buffer.push(&quad.to_le_bytes()); + self.state.chunk_map[hash_u16 as usize] = quad; + } + } + + offset += vl; + } + } + } } impl Decoder for Chameleon { #[inline(always)] fn decode_unit(&mut self, in_buffer: &mut ReadBuffer, signature: &mut ReadSignature, out_buffer: &mut WriteBuffer) { let (quad_a, quad_b) = match signature.read_bits(DECODE_TWIN_FLAG_MASK, DECODE_TWIN_FLAG_MASK_BITS) { - PLAIN_PLAIN_FLAGS => { (self.decode_plain(in_buffer), self.decode_plain(in_buffer)) } - MAP_PLAIN_FLAGS => { (self.decode_map(in_buffer), self.decode_plain(in_buffer)) } - PLAIN_MAP_FLAGS => { (self.decode_plain(in_buffer), self.decode_map(in_buffer)) } - _ => { (self.decode_map(in_buffer), self.decode_map(in_buffer)) } + PLAIN_PLAIN_FLAGS => (self.decode_plain(in_buffer), self.decode_plain(in_buffer)), + MAP_PLAIN_FLAGS => (self.decode_map(in_buffer), self.decode_plain(in_buffer)), + PLAIN_MAP_FLAGS => (self.decode_plain(in_buffer), self.decode_map(in_buffer)), + _ => (self.decode_map(in_buffer), self.decode_map(in_buffer)), }; out_buffer.push(&quad_a.to_le_bytes()); out_buffer.push(&quad_b.to_le_bytes()); @@ -103,15 +164,15 @@ impl Decoder for Chameleon { let quad = match signature.read_bits(DECODE_FLAG_MASK, DECODE_FLAG_MASK_BITS) { PLAIN_FLAG => { match in_buffer.remaining() { - 0 => { return true; } + 0 => return true, 1..=3 => { out_buffer.push(in_buffer.read(in_buffer.remaining())); return true; } - _ => { self.decode_plain(in_buffer) } + _ => self.decode_plain(in_buffer), } } - _ => { self.decode_map(in_buffer) } + _ => self.decode_map(in_buffer), }; out_buffer.push(&quad.to_le_bytes()); } diff --git a/src/codec/quad_encoder.rs b/src/codec/quad_encoder.rs index 8960508..45d89f2 100644 --- a/src/codec/quad_encoder.rs +++ b/src/codec/quad_encoder.rs @@ -1,6 +1,12 @@ +// File: src/codec/quad_encoder.rs + use crate::io::write_buffer::WriteBuffer; use crate::io::write_signature::WriteSignature; pub trait QuadEncoder { + /// 编码单个 u32 quad fn encode_quad(&mut self, quad: u32, out_buffer: &mut WriteBuffer, signature: &mut WriteSignature); -} \ No newline at end of file + + /// 批量编码 u32 quads,支持 RVV 或标量实现 + fn encode_batch(&mut self, quads: &[u32], out_buffer: &mut WriteBuffer, signature: &mut WriteSignature); +} From bdb4aeb9774d9159ed116c4f0fc7b74e422a65d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=A7=E9=9B=A8=E5=B0=8F=E6=B0=B4?= <792179245@qq.com> Date: Wed, 20 Aug 2025 17:21:47 +0800 Subject: [PATCH 3/8] fix errors Co-authored-by: gong-flying --- src/algorithms/cheetah/cheetah.rs | 10 ++++++++-- src/algorithms/lion/lion.rs | 10 ++++++++-- src/lib.rs | 3 +++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/algorithms/cheetah/cheetah.rs b/src/algorithms/cheetah/cheetah.rs index c9cbb00..5adbbb1 100644 --- a/src/algorithms/cheetah/cheetah.rs +++ b/src/algorithms/cheetah/cheetah.rs @@ -13,7 +13,6 @@ use crate::{BIT_SIZE_U16, BIT_SIZE_U32, BYTE_SIZE_U32}; pub(crate) const CHEETAH_HASH_BITS: usize = BIT_SIZE_U16; pub(crate) const CHEETAH_HASH_MULTIPLIER: u32 = 0x9D6EF916; - pub(crate) const FLAG_SIZE_BITS: u8 = 2; pub(crate) const MAP_A_FLAG: u64 = 0x1; pub(crate) const MAP_B_FLAG: u64 = 0x2; @@ -131,6 +130,13 @@ impl QuadEncoder for Cheetah { } self.state.last_hash = hash_u16; } + + #[inline(always)] + fn encode_batch(&mut self, quads: &[u32], out_buffer: &mut WriteBuffer, signature: &mut WriteSignature) { + for &quad in quads { + self.encode_quad(quad, out_buffer, signature); + } + } } impl Decoder for Cheetah { @@ -184,4 +190,4 @@ impl Codec for Cheetah { self.state.chunk_map.fill(ChunkData { chunk_a: 0, chunk_b: 0 }); self.state.prediction_map.fill(PredictionData { next: 0 }); } -} \ No newline at end of file +} diff --git a/src/algorithms/lion/lion.rs b/src/algorithms/lion/lion.rs index 8ad36cb..8da2469 100644 --- a/src/algorithms/lion/lion.rs +++ b/src/algorithms/lion/lion.rs @@ -13,7 +13,6 @@ use crate::{BIT_SIZE_U16, BIT_SIZE_U32, BYTE_SIZE_U16, BYTE_SIZE_U32}; pub(crate) const LION_HASH_BITS: usize = BIT_SIZE_U16; pub(crate) const LION_HASH_MULTIPLIER: u32 = 0x9D6EF916; - pub(crate) const FLAG_SIZE_BITS: u8 = 3; pub(crate) const PREDICTED_A_FLAG: u64 = 0x1; pub(crate) const PREDICTED_B_FLAG: u64 = 0x2; @@ -252,6 +251,13 @@ impl QuadEncoder for Lion { self.update_last_hash(hash_u16); } + + #[inline(always)] + fn encode_batch(&mut self, quads: &[u32], out_buffer: &mut WriteBuffer, signature: &mut WriteSignature) { + for &quad in quads { + self.encode_quad(quad, out_buffer, signature); + } + } } impl Decoder for Lion { @@ -333,4 +339,4 @@ impl Codec for Lion { } } } -} \ No newline at end of file +} diff --git a/src/lib.rs b/src/lib.rs index 94365aa..b68fd1b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,6 @@ +#![feature(riscv_ext_intrinsics)] +#![feature(riscv_target_feature)] + pub mod codec; pub mod algorithms; pub mod buffer; From 50809834f4774321f056958e8c57159f5380883e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=A6=E4=B9=A0=E4=B8=AD=E7=9A=84=E7=89=9B=E9=A9=AC?= <158081477+Dayuxiaoshui@users.noreply.github.com> Date: Wed, 20 Aug 2025 21:06:23 +0800 Subject: [PATCH 4/8] Update codec.rs --- src/codec/codec.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/codec/codec.rs b/src/codec/codec.rs index e844d6c..6fd7637 100644 --- a/src/codec/codec.rs +++ b/src/codec/codec.rs @@ -40,30 +40,24 @@ pub trait Codec: QuadEncoder + Decoder { signature.init(out_buffer.index); out_buffer.skip(Self::signature_significant_bytes()); - // 安全对齐block到u32,用于批处理 let (prefix, u32_block, suffix) = unsafe { block.align_to::() }; - // 处理不对齐前缀(<4字节,稀有;使用标量或直接push) for bytes in prefix.chunks(BYTE_SIZE_U32) { if bytes.len() == BYTE_SIZE_U32 { let quad = u32::from_le_bytes(bytes.try_into().unwrap()); self.encode_quad(quad, out_buffer, signature); } else { - // 隐式plain flag out_buffer.push(bytes); } } - // 批处理对齐的u32 quads(主要热点路径) self.encode_batch(u32_block, out_buffer, signature); - // 处理后缀(<4字节剩余) for bytes in suffix.chunks(BYTE_SIZE_U32) { if bytes.len() == BYTE_SIZE_U32 { let quad = u32::from_le_bytes(bytes.try_into().unwrap()); self.encode_quad(quad, out_buffer, signature); } else { - // 隐式plain flag out_buffer.push(bytes); } } From 54efa9b8cbce72c5427cdbda19faf0a838220470 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=A6=E4=B9=A0=E4=B8=AD=E7=9A=84=E7=89=9B=E9=A9=AC?= <158081477+Dayuxiaoshui@users.noreply.github.com> Date: Wed, 20 Aug 2025 21:07:09 +0800 Subject: [PATCH 5/8] Update chameleon.rs Co-authored-by: gong-flying --- src/algorithms/chameleon/chameleon.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/algorithms/chameleon/chameleon.rs b/src/algorithms/chameleon/chameleon.rs index 7fef6d3..b9b8256 100644 --- a/src/algorithms/chameleon/chameleon.rs +++ b/src/algorithms/chameleon/chameleon.rs @@ -1,5 +1,3 @@ -// File: src/chameleon.rs - use crate::algorithms::PLAIN_FLAG; use crate::codec::codec::Codec; use crate::codec::decoder::Decoder; @@ -23,7 +21,6 @@ pub(crate) const PLAIN_PLAIN_FLAGS: u64 = (PLAIN_FLAG << 1) | PLAIN_FLAG; pub(crate) const MAP_PLAIN_FLAGS: u64 = (PLAIN_FLAG << 1) | MAP_FLAG; pub(crate) const PLAIN_MAP_FLAGS: u64 = (MAP_FLAG << 1) | PLAIN_FLAG; // pub(crate) const _MAP_MAP_FLAGS: u64 = (MAP_FLAG << 1) | MAP_FLAG; - pub(crate) const DECODE_TWIN_FLAG_MASK: u64 = 0x3; pub(crate) const DECODE_TWIN_FLAG_MASK_BITS: u8 = 2; pub(crate) const DECODE_FLAG_MASK: u64 = 0x1; @@ -123,8 +120,7 @@ impl QuadEncoder for Chameleon { for i in 0..vl { let single_mask = vslidedown_vx_m_b32(v_mask, i as u32, vl); - hit_arr[i] = vfirst_m_b32(single_mask, 1) != -1; // 如果位 set,则 hit - } + hit_arr[i] = vfirst_m_b32(single_mask, 1) != -1; for i in 0..vl { let quad = quad_arr[i]; From e9501a7032158dcb2e40ba646ea0008b6e1b2b57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=A7=E9=9B=A8=E5=B0=8F=E6=B0=B4?= <792179245@qq.com> Date: Wed, 20 Aug 2025 21:31:48 +0800 Subject: [PATCH 6/8] feat:fix error Co-authored-by: gong-flying --- src/algorithms/chameleon/chameleon.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/algorithms/chameleon/chameleon.rs b/src/algorithms/chameleon/chameleon.rs index b9b8256..87e1bf2 100644 --- a/src/algorithms/chameleon/chameleon.rs +++ b/src/algorithms/chameleon/chameleon.rs @@ -121,7 +121,7 @@ impl QuadEncoder for Chameleon { for i in 0..vl { let single_mask = vslidedown_vx_m_b32(v_mask, i as u32, vl); hit_arr[i] = vfirst_m_b32(single_mask, 1) != -1; - + } for i in 0..vl { let quad = quad_arr[i]; let hash_u16 = hash_arr[i] as u16; From 7b66862256818b7c65772fcd2e2bdd3381776cc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=A7=E9=9B=A8=E5=B0=8F=E6=B0=B4?= <792179245@qq.com> Date: Fri, 22 Aug 2025 21:15:25 +0800 Subject: [PATCH 7/8] feat:fix error Co-authored-by: gong-flying --- src/algorithms/chameleon/chameleon.rs | 2 ++ src/lib.rs | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/algorithms/chameleon/chameleon.rs b/src/algorithms/chameleon/chameleon.rs index 87e1bf2..8fad20a 100644 --- a/src/algorithms/chameleon/chameleon.rs +++ b/src/algorithms/chameleon/chameleon.rs @@ -9,6 +9,8 @@ use crate::io::read_signature::ReadSignature; use crate::io::write_buffer::WriteBuffer; use crate::io::write_signature::WriteSignature; use crate::{BIT_SIZE_U16, BIT_SIZE_U32, BYTE_SIZE_U32}; + +#[cfg(all(target_arch = "riscv64", target_feature = "v"))] use std::arch::riscv64::*; pub(crate) const CHAMELEON_HASH_BITS: usize = BIT_SIZE_U16; diff --git a/src/lib.rs b/src/lib.rs index b68fd1b..74aa95a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ -#![feature(riscv_ext_intrinsics)] -#![feature(riscv_target_feature)] +#![cfg_attr(all(target_arch = "riscv64", target_feature = "v"), feature(riscv_ext_intrinsics))] +#![cfg_attr(all(target_arch = "riscv64", target_feature = "v"), feature(riscv_target_feature))] pub mod codec; pub mod algorithms; From b2e231f0955cddf223dd91ccc666b4e613614816 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=A7=E9=9B=A8=E5=B0=8F=E6=B0=B4?= <792179245@qq.com> Date: Fri, 29 Aug 2025 18:37:43 +0800 Subject: [PATCH 8/8] feat:fix error Co-authored-by: gong-flying --- benchmark.log | 56 --------------------------- src/algorithms/chameleon/chameleon.rs | 15 ++++--- src/codec/codec.rs | 38 +++++++++--------- src/lib.rs | 3 +- 4 files changed, 31 insertions(+), 81 deletions(-) delete mode 100644 benchmark.log diff --git a/benchmark.log b/benchmark.log deleted file mode 100644 index 3d9f074..0000000 --- a/benchmark.log +++ /dev/null @@ -1,56 +0,0 @@ -cargo bench - Finished `bench` profile [optimized] target(s) in 0.27s - Running unittests src/lib.rs (target/release/deps/density_rs-d3297b9d2331d177) - -running 3 tests -test tests::chameleon ... ignored -test tests::cheetah ... ignored -test tests::lion ... ignored - -test result: ok. 0 passed; 0 failed; 3 ignored; 0 measured; 0 filtered out; finished in 0.00s - - Running benches/density.rs (target/release/deps/density-337b4b824fbad157) -Using file ./benches/data/dickens.txt (10192446 bytes) -Timer precision: 41 ns -density fastest │ slowest │ median │ mean │ samples │ iters -├─ chameleon │ │ │ │ │ -│ ├─ compress/raw (1.749x) 4.606 ms │ 5.252 ms │ 4.725 ms │ 4.742 ms │ 25 │ 25 -│ │ 2.212 GB/s │ 1.94 GB/s │ 2.156 GB/s │ 2.149 GB/s │ │ -│ ╰─ decompress/raw 3.397 ms │ 3.567 ms │ 3.452 ms │ 3.456 ms │ 25 │ 25 -│ 3 GB/s │ 2.856 GB/s │ 2.952 GB/s │ 2.949 GB/s │ │ -├─ cheetah │ │ │ │ │ -│ ├─ compress/raw (1.860x) 8.388 ms │ 8.854 ms │ 8.556 ms │ 8.551 ms │ 25 │ 25 -│ │ 1.215 GB/s │ 1.151 GB/s │ 1.191 GB/s │ 1.191 GB/s │ │ -│ ╰─ decompress/raw 5.781 ms │ 6.257 ms │ 5.882 ms │ 5.894 ms │ 25 │ 25 -│ 1.762 GB/s │ 1.628 GB/s │ 1.732 GB/s │ 1.729 GB/s │ │ -╰─ lion │ │ │ │ │ - ├─ compress/raw (1.966x) 14.42 ms │ 14.79 ms │ 14.55 ms │ 14.55 ms │ 25 │ 25 - │ 706.5 MB/s │ 689.1 MB/s │ 700.4 MB/s │ 700.2 MB/s │ │ - ╰─ decompress/raw 9.31 ms │ 9.787 ms │ 9.469 ms │ 9.483 ms │ 25 │ 25 - 1.094 GB/s │ 1.041 GB/s │ 1.076 GB/s │ 1.074 GB/s │ │ - - Running benches/lz4.rs (target/release/deps/lz4-9c50a6cd5b53e994) -Using file ./benches/data/dickens.txt (10192446 bytes) -Timer precision: 41 ns -lz4 fastest │ slowest │ median │ mean │ samples │ iters -╰─ default │ │ │ │ │ - ├─ compress/raw (1.585x) 21.41 ms │ 22.37 ms │ 21.79 ms │ 21.79 ms │ 25 │ 25 - │ 476 MB/s │ 455.5 MB/s │ 467.6 MB/s │ 467.5 MB/s │ │ - ╰─ decompress/raw 3.405 ms │ 3.667 ms │ 3.436 ms │ 3.465 ms │ 25 │ 25 - 2.993 GB/s │ 2.778 GB/s │ 2.966 GB/s │ 2.94 GB/s │ │ - - Running benches/snappy.rs (target/release/deps/snappy-33d1f219f1371d73) -Using file ./benches/data/dickens.txt (10192446 bytes) -Timer precision: 41 ns -snappy fastest │ slowest │ median │ mean │ samples │ iters -╰─ default │ │ │ │ │ - ├─ compress/stream (1.607x) 28.59 ms │ 29.17 ms │ 28.87 ms │ 28.88 ms │ 25 │ 25 - │ 356.4 MB/s │ 349.3 MB/s │ 352.9 MB/s │ 352.8 MB/s │ │ - ╰─ decompress/stream 12.95 ms │ 13.64 ms │ 13.16 ms │ 13.17 ms │ 25 │ 25 - 786.6 MB/s │ 746.9 MB/s │ 774 MB/s │ 773.7 MB/s │ │ - - Running benches/utils.rs (target/release/deps/utils-0441cb69e0fcfbda) - -running 0 tests - -test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s \ No newline at end of file diff --git a/src/algorithms/chameleon/chameleon.rs b/src/algorithms/chameleon/chameleon.rs index 8fad20a..8d44655 100644 --- a/src/algorithms/chameleon/chameleon.rs +++ b/src/algorithms/chameleon/chameleon.rs @@ -74,14 +74,17 @@ impl QuadEncoder for Chameleon { fn encode_quad(&mut self, quad: u32, out_buffer: &mut WriteBuffer, signature: &mut WriteSignature) { let hash_u16 = (quad.wrapping_mul(CHAMELEON_HASH_MULTIPLIER) >> (BIT_SIZE_U32 - CHAMELEON_HASH_BITS)) as u16; let dictionary_value = &mut self.state.chunk_map[hash_u16 as usize]; - if *dictionary_value != quad { + + // 检查字典命中 + if *dictionary_value == quad { + // 字典命中,输出哈希引用 + signature.push_bits(MAP_FLAG, FLAG_SIZE_BITS); + out_buffer.push(&hash_u16.to_le_bytes()); + } else { + // 字典未命中,输出原始数据并更新字典 signature.push_bits(PLAIN_FLAG, FLAG_SIZE_BITS); out_buffer.push(&quad.to_le_bytes()); - *dictionary_value = quad; - } else { - signature.push_bits(MAP_FLAG, FLAG_SIZE_BITS); - out_buffer.push(&hash_u16.to_le_bytes()); } } @@ -95,6 +98,8 @@ impl QuadEncoder for Chameleon { return; } + // ... existing code ... + #[cfg(all(target_arch = "riscv64", target_feature = "v"))] unsafe { let num_quads = quads.len(); diff --git a/src/codec/codec.rs b/src/codec/codec.rs index 6fd7637..0088624 100644 --- a/src/codec/codec.rs +++ b/src/codec/codec.rs @@ -7,7 +7,7 @@ use crate::io::read_buffer::ReadBuffer; use crate::io::read_signature::ReadSignature; use crate::io::write_buffer::WriteBuffer; use crate::io::write_signature::WriteSignature; -use crate::{BYTE_SIZE_U128, BYTE_SIZE_U32}; +use crate::BYTE_SIZE_U32; pub trait Codec: QuadEncoder + Decoder { fn block_size() -> usize; @@ -40,26 +40,28 @@ pub trait Codec: QuadEncoder + Decoder { signature.init(out_buffer.index); out_buffer.skip(Self::signature_significant_bytes()); - let (prefix, u32_block, suffix) = unsafe { block.align_to::() }; - - for bytes in prefix.chunks(BYTE_SIZE_U32) { - if bytes.len() == BYTE_SIZE_U32 { - let quad = u32::from_le_bytes(bytes.try_into().unwrap()); - self.encode_quad(quad, out_buffer, signature); + // 统一处理所有数据为小端序的quads + let mut all_quads = Vec::new(); + let mut remaining_bytes = Vec::new(); + + for chunk in block.chunks(BYTE_SIZE_U32) { + if chunk.len() == BYTE_SIZE_U32 { + let quad = u32::from_le_bytes(chunk.try_into().unwrap()); + all_quads.push(quad); } else { - out_buffer.push(bytes); + // 收集不完整的字节,稍后处理 + remaining_bytes.extend_from_slice(chunk); } } - - self.encode_batch(u32_block, out_buffer, signature); - - for bytes in suffix.chunks(BYTE_SIZE_U32) { - if bytes.len() == BYTE_SIZE_U32 { - let quad = u32::from_le_bytes(bytes.try_into().unwrap()); - self.encode_quad(quad, out_buffer, signature); - } else { - out_buffer.push(bytes); - } + + // 先处理所有完整的quads + if !all_quads.is_empty() { + self.encode_batch(&all_quads, out_buffer, signature); + } + + // 最后处理剩余的不完整字节 + if !remaining_bytes.is_empty() { + out_buffer.push(&remaining_bytes); } Self::write_signature(out_buffer, signature); diff --git a/src/lib.rs b/src/lib.rs index 74aa95a..4accde7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -86,5 +86,4 @@ mod tests { Err(_) => { assert!(false); } } } -} - +} \ No newline at end of file