Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 0 additions & 56 deletions benchmark.log

This file was deleted.

90 changes: 77 additions & 13 deletions src/algorithms/chameleon/chameleon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ use crate::io::write_signature::WriteSignature;
use crate::{BIT_SIZE_U16, BIT_SIZE_U32, BYTE_SIZE_U32};
use std::slice::{from_raw_parts, from_raw_parts_mut};

#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
use std::arch::riscv64::*;

pub(crate) const CHAMELEON_HASH_BITS: usize = BIT_SIZE_U16;
pub(crate) const CHAMELEON_HASH_MULTIPLIER: u32 = 0x9D6EF916;

Expand All @@ -21,7 +24,6 @@ pub(crate) const PLAIN_PLAIN_FLAGS: u64 = (PLAIN_FLAG << 1) | PLAIN_FLAG;
pub(crate) const MAP_PLAIN_FLAGS: u64 = (PLAIN_FLAG << 1) | MAP_FLAG;
pub(crate) const PLAIN_MAP_FLAGS: u64 = (MAP_FLAG << 1) | PLAIN_FLAG;
// pub(crate) const _MAP_MAP_FLAGS: u64 = (MAP_FLAG << 1) | MAP_FLAG;

pub(crate) const DECODE_TWIN_FLAG_MASK: u64 = 0x3;
pub(crate) const DECODE_TWIN_FLAG_MASK_BITS: u8 = 2;
pub(crate) const DECODE_FLAG_MASK: u64 = 0x1;
Expand Down Expand Up @@ -88,14 +90,76 @@ impl QuadEncoder for Chameleon {
fn encode_quad(&mut self, quad: u32, out_buffer: &mut WriteBuffer, signature: &mut WriteSignature) {
let hash_u16 = (quad.wrapping_mul(CHAMELEON_HASH_MULTIPLIER) >> (BIT_SIZE_U32 - CHAMELEON_HASH_BITS)) as u16;
let dictionary_value = &mut self.state.chunk_map[hash_u16 as usize];
if *dictionary_value != quad {

// 检查字典命中
if *dictionary_value == quad {
// 字典命中,输出哈希引用
signature.push_bits(MAP_FLAG, FLAG_SIZE_BITS);
out_buffer.push(&hash_u16.to_le_bytes());
} else {
// 字典未命中,输出原始数据并更新字典
signature.push_bits(PLAIN_FLAG, FLAG_SIZE_BITS);
out_buffer.push(&quad.to_le_bytes());

*dictionary_value = quad;
} else {
signature.push_bits(MAP_FLAG, FLAG_SIZE_BITS);
out_buffer.push(&hash_u16.to_le_bytes());
}
}

#[inline(always)]
fn encode_batch(&mut self, quads: &[u32], out_buffer: &mut WriteBuffer, signature: &mut WriteSignature) {
#[cfg(not(all(target_arch = "riscv64", target_feature = "v")))]
{
for &quad in quads {
self.encode_quad(quad, out_buffer, signature);
}
return;
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my tests during development, I found that splitting the block into 128-bit sub-blocks and then extracting 4 quads using shift-masking was faster than iterating over each quad as you do here - see the initial codec.rs file for that:

for sub_block in block.chunks(BYTE_SIZE_U128) {
match <&[u8] as TryInto<[u8; BYTE_SIZE_U128]>>::try_into(sub_block) {

I suspect this is because the Rust compiler could easily optimize the code (sequential self.encode_quad() calls).

}

// ... existing code ...

#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
unsafe {
let num_quads = quads.len();
let mut offset = 0;
while offset < num_quads {
let remaining = num_quads - offset;
let vl = vsetvli(remaining, riscv64::riscv_v_sew::E32, riscv64::riscv_v_lmul::M1, riscv64::riscv_v_ta::TA, riscv64::riscv_v_ma::MA);

let v_quad = vle32_v_u32m1(quads.as_ptr().add(offset) as *const u32, vl);

let v_mult = vmul_vx_u32m1(v_quad, CHAMELEON_HASH_MULTIPLIER, vl);
let v_hash = vsrl_vx_u32m1(v_mult, BIT_SIZE_U32 - CHAMELEON_HASH_BITS as u32, vl);

let dict_ptr = self.state.chunk_map.as_mut_ptr();
let v_dict = vluxei32_v_u32m1(dict_ptr as *const u32, v_hash, vl);

let v_mask = vmseq_vv_m_b32(v_dict, v_quad, vl); // hit mask (true if match)

let mut quad_arr = vec![0u32; vl];
let mut hash_arr = vec![0u32; vl];
let mut hit_arr = vec![false; vl];

vse32_v_u32m1(quad_arr.as_mut_ptr(), v_quad, vl);
vse32_v_u32m1(hash_arr.as_mut_ptr(), v_hash, vl);

for i in 0..vl {
let single_mask = vslidedown_vx_m_b32(v_mask, i as u32, vl);
hit_arr[i] = vfirst_m_b32(single_mask, 1) != -1;
}
for i in 0..vl {
let quad = quad_arr[i];
let hash_u16 = hash_arr[i] as u16;
if hit_arr[i] {
signature.push_bits(MAP_FLAG, FLAG_SIZE_BITS);
out_buffer.push(&hash_u16.to_le_bytes());
} else {
signature.push_bits(PLAIN_FLAG, FLAG_SIZE_BITS);
out_buffer.push(&quad.to_le_bytes());
self.state.chunk_map[hash_u16 as usize] = quad;
Comment thread
Dayuxiaoshui marked this conversation as resolved.
}
}

offset += vl;
}
}
}
}
Expand All @@ -104,10 +168,10 @@ impl Decoder for Chameleon {
#[inline(always)]
fn decode_unit(&mut self, in_buffer: &mut ReadBuffer, signature: &mut ReadSignature, out_buffer: &mut WriteBuffer) {
let (quad_a, quad_b) = match signature.read_bits(DECODE_TWIN_FLAG_MASK, DECODE_TWIN_FLAG_MASK_BITS) {
PLAIN_PLAIN_FLAGS => { (self.decode_plain(in_buffer), self.decode_plain(in_buffer)) }
MAP_PLAIN_FLAGS => { (self.decode_map(in_buffer), self.decode_plain(in_buffer)) }
PLAIN_MAP_FLAGS => { (self.decode_plain(in_buffer), self.decode_map(in_buffer)) }
_ => { (self.decode_map(in_buffer), self.decode_map(in_buffer)) }
PLAIN_PLAIN_FLAGS => (self.decode_plain(in_buffer), self.decode_plain(in_buffer)),
MAP_PLAIN_FLAGS => (self.decode_map(in_buffer), self.decode_plain(in_buffer)),
PLAIN_MAP_FLAGS => (self.decode_plain(in_buffer), self.decode_map(in_buffer)),
_ => (self.decode_map(in_buffer), self.decode_map(in_buffer)),
};
out_buffer.push(&quad_a.to_le_bytes());
out_buffer.push(&quad_b.to_le_bytes());
Expand All @@ -119,15 +183,15 @@ impl Decoder for Chameleon {
let quad = match signature.read_bits(DECODE_FLAG_MASK, DECODE_FLAG_MASK_BITS) {
PLAIN_FLAG => {
match in_buffer.remaining() {
0 => { return true; }
0 => return true,
1..=3 => {
out_buffer.push(in_buffer.read(in_buffer.remaining()));
return true;
}
_ => { self.decode_plain(in_buffer) }
_ => self.decode_plain(in_buffer),
}
}
_ => { self.decode_map(in_buffer) }
_ => self.decode_map(in_buffer),
};
out_buffer.push(&quad.to_le_bytes());
}
Expand Down
10 changes: 8 additions & 2 deletions src/algorithms/cheetah/cheetah.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ use std::slice::{from_raw_parts, from_raw_parts_mut};
pub(crate) const CHEETAH_HASH_BITS: usize = BIT_SIZE_U16;
pub(crate) const CHEETAH_HASH_MULTIPLIER: u32 = 0x9D6EF916;


pub(crate) const FLAG_SIZE_BITS: u8 = 2;
pub(crate) const MAP_A_FLAG: u64 = 0x1;
pub(crate) const MAP_B_FLAG: u64 = 0x2;
Expand Down Expand Up @@ -147,6 +146,13 @@ impl QuadEncoder for Cheetah {
}
self.state.last_hash = hash_u16;
}

#[inline(always)]
fn encode_batch(&mut self, quads: &[u32], out_buffer: &mut WriteBuffer, signature: &mut WriteSignature) {
for &quad in quads {
self.encode_quad(quad, out_buffer, signature);
}
}
}

impl Decoder for Cheetah {
Expand Down Expand Up @@ -200,4 +206,4 @@ impl Codec for Cheetah {
self.state.chunk_map.fill(ChunkData { chunk_a: 0, chunk_b: 0 });
self.state.prediction_map.fill(PredictionData { next: 0 });
}
}
}
10 changes: 8 additions & 2 deletions src/algorithms/lion/lion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ use std::slice::{from_raw_parts, from_raw_parts_mut};
pub(crate) const LION_HASH_BITS: usize = BIT_SIZE_U16;
pub(crate) const LION_HASH_MULTIPLIER: u32 = 0x9D6EF916;


pub(crate) const FLAG_SIZE_BITS: u8 = 3;
pub(crate) const PREDICTED_A_FLAG: u64 = 0x1;
pub(crate) const PREDICTED_B_FLAG: u64 = 0x2;
Expand Down Expand Up @@ -268,6 +267,13 @@ impl QuadEncoder for Lion {

self.update_last_hash(hash_u16);
}

#[inline(always)]
fn encode_batch(&mut self, quads: &[u32], out_buffer: &mut WriteBuffer, signature: &mut WriteSignature) {
for &quad in quads {
self.encode_quad(quad, out_buffer, signature);
}
}
}

impl Decoder for Lion {
Expand Down Expand Up @@ -349,4 +355,4 @@ impl Codec for Lion {
}
}
}
}
}
50 changes: 25 additions & 25 deletions src/codec/codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::io::read_buffer::ReadBuffer;
use crate::io::read_signature::ReadSignature;
use crate::io::write_buffer::WriteBuffer;
use crate::io::write_signature::WriteSignature;
use crate::{BYTE_SIZE_U128, BYTE_SIZE_U32};
use crate::BYTE_SIZE_U32;

pub trait Codec: QuadEncoder + Decoder {
fn block_size() -> usize;
Expand Down Expand Up @@ -39,31 +39,31 @@ pub trait Codec: QuadEncoder + Decoder {
let mark = out_buffer.index;
signature.init(out_buffer.index);
out_buffer.skip(Self::signature_significant_bytes());
for sub_block in block.chunks(BYTE_SIZE_U128) {
match <&[u8] as TryInto<[u8; BYTE_SIZE_U128]>>::try_into(sub_block) {
Ok(array) => {
let value_u128 = u128::from_le_bytes(array);
self.encode_quad((value_u128 & 0xffffffff) as u32, out_buffer, signature);
self.encode_quad(((value_u128 >> 32) & 0xffffffff) as u32, out_buffer, signature);
self.encode_quad(((value_u128 >> 64) & 0xffffffff) as u32, out_buffer, signature);
self.encode_quad((value_u128 >> 96) as u32, out_buffer, signature);
}
Err(_error) => {
// Less than 16 bytes left
for bytes in sub_block.chunks(BYTE_SIZE_U32) {
match <&[u8] as TryInto<[u8; BYTE_SIZE_U32]>>::try_into(bytes) {
Ok(array) => {
self.encode_quad(u32::from_le_bytes(array), out_buffer, signature);
}
Err(_error) => {
// Implicit signature plain flag (0x0)
out_buffer.push(bytes);
}
}
}
}

// 统一处理所有数据为小端序的quads
let mut all_quads = Vec::new();
let mut remaining_bytes = Vec::new();

for chunk in block.chunks(BYTE_SIZE_U32) {
if chunk.len() == BYTE_SIZE_U32 {
let quad = u32::from_le_bytes(chunk.try_into().unwrap());
all_quads.push(quad);
} else {
// 收集不完整的字节,稍后处理
remaining_bytes.extend_from_slice(chunk);
}
}

// 先处理所有完整的quads
if !all_quads.is_empty() {
self.encode_batch(&all_quads, out_buffer, signature);
}

// 最后处理剩余的不完整字节
if !remaining_bytes.is_empty() {
out_buffer.push(&remaining_bytes);
}

Self::write_signature(out_buffer, signature);
protection_state.update(out_buffer.index - mark >= Self::block_size());
}
Expand Down Expand Up @@ -124,4 +124,4 @@ pub trait Codec: QuadEncoder + Decoder {

Ok(out_buffer.index)
}
}
}
8 changes: 7 additions & 1 deletion src/codec/quad_encoder.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
// File: src/codec/quad_encoder.rs

use crate::io::write_buffer::WriteBuffer;
use crate::io::write_signature::WriteSignature;

pub trait QuadEncoder {
/// 编码单个 u32 quad
fn encode_quad(&mut self, quad: u32, out_buffer: &mut WriteBuffer, signature: &mut WriteSignature);
}

/// 批量编码 u32 quads,支持 RVV 或标量实现
fn encode_batch(&mut self, quads: &[u32], out_buffer: &mut WriteBuffer, signature: &mut WriteSignature);
}
6 changes: 4 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#![cfg_attr(all(target_arch = "riscv64", target_feature = "v"), feature(riscv_ext_intrinsics))]
#![cfg_attr(all(target_arch = "riscv64", target_feature = "v"), feature(riscv_target_feature))]

pub mod codec;
pub mod algorithms;
pub mod buffer;
Expand Down Expand Up @@ -83,5 +86,4 @@ mod tests {
Err(_) => { assert!(false); }
}
}
}

}
Loading