diff --git a/benchmark.log b/benchmark.log
deleted file mode 100644
index 3d9f074..0000000
--- a/benchmark.log
+++ /dev/null
@@ -1,56 +0,0 @@
-cargo bench
-    Finished `bench` profile [optimized] target(s) in 0.27s
-     Running unittests src/lib.rs (target/release/deps/density_rs-d3297b9d2331d177)
-
-running 3 tests
-test tests::chameleon ... ignored
-test tests::cheetah ... ignored
-test tests::lion ... ignored
-
-test result: ok. 0 passed; 0 failed; 3 ignored; 0 measured; 0 filtered out; finished in 0.00s
-
-     Running benches/density.rs (target/release/deps/density-337b4b824fbad157)
-Using file ./benches/data/dickens.txt (10192446 bytes)
-Timer precision: 41 ns
-density                            fastest       │ slowest       │ median        │ mean          │ samples │ iters
-├─ chameleon                                     │               │               │               │         │
-│  ├─ compress/raw      (1.749x)   4.606 ms      │ 5.252 ms      │ 4.725 ms      │ 4.742 ms      │ 25      │ 25
-│  │                               2.212 GB/s    │ 1.94 GB/s     │ 2.156 GB/s    │ 2.149 GB/s    │         │
-│  ╰─ decompress/raw               3.397 ms      │ 3.567 ms      │ 3.452 ms      │ 3.456 ms      │ 25      │ 25
-│                                  3 GB/s        │ 2.856 GB/s    │ 2.952 GB/s    │ 2.949 GB/s    │         │
-├─ cheetah                                       │               │               │               │         │
-│  ├─ compress/raw      (1.860x)   8.388 ms      │ 8.854 ms      │ 8.556 ms      │ 8.551 ms      │ 25      │ 25
-│  │                               1.215 GB/s    │ 1.151 GB/s    │ 1.191 GB/s    │ 1.191 GB/s    │         │
-│  ╰─ decompress/raw               5.781 ms      │ 6.257 ms      │ 5.882 ms      │ 5.894 ms      │ 25      │ 25
-│                                  1.762 GB/s    │ 1.628 GB/s    │ 1.732 GB/s    │ 1.729 GB/s    │         │
-╰─ lion                                          │               │               │               │         │
-   ├─ compress/raw      (1.966x)   14.42 ms      │ 14.79 ms      │ 14.55 ms      │ 14.55 ms      │ 25      │ 25
-   │                               706.5 MB/s    │ 689.1 MB/s    │ 700.4 MB/s    │ 700.2 MB/s    │         │
-   ╰─ decompress/raw               9.31 ms       │ 9.787 ms      │ 9.469 ms      │ 9.483 ms      │ 25      │ 25
-                                   1.094 GB/s    │ 1.041 GB/s    │ 1.076 GB/s    │ 1.074 GB/s    │         │
-
-     Running benches/lz4.rs (target/release/deps/lz4-9c50a6cd5b53e994)
-Using file ./benches/data/dickens.txt (10192446 bytes)
-Timer precision: 41 ns
-lz4                                fastest       │ slowest       │ median        │ mean          │ samples │ iters
-╰─ default                                       │               │               │               │         │
-   ├─ compress/raw      (1.585x)   21.41 ms      │ 22.37 ms      │ 21.79 ms      │ 21.79 ms      │ 25      │ 25
-   │                               476 MB/s      │ 455.5 MB/s    │ 467.6 MB/s    │ 467.5 MB/s    │         │
-   ╰─ decompress/raw               3.405 ms      │ 3.667 ms      │ 3.436 ms      │ 3.465 ms      │ 25      │ 25
-                                   2.993 GB/s    │ 2.778 GB/s    │ 2.966 GB/s    │ 2.94 GB/s     │         │
-
-     Running benches/snappy.rs (target/release/deps/snappy-33d1f219f1371d73)
-Using file ./benches/data/dickens.txt (10192446 bytes)
-Timer precision: 41 ns
-snappy                             fastest       │ slowest       │ median        │ mean          │ samples │ iters
-╰─ default                                       │               │               │               │         │
-   ├─ compress/stream   (1.607x)   28.59 ms      │ 29.17 ms      │ 28.87 ms      │ 28.88 ms      │ 25      │ 25
-   │                               356.4 MB/s    │ 349.3 MB/s    │ 352.9 MB/s    │ 352.8 MB/s    │         │
-   ╰─ decompress/stream            12.95 ms      │ 13.64 ms      │ 13.16 ms      │ 13.17 ms      │ 25      │ 25
-                                   786.6 MB/s    │ 746.9 MB/s    │ 774 MB/s      │ 773.7 MB/s    │         │
-
-     Running benches/utils.rs (target/release/deps/utils-0441cb69e0fcfbda)
-
-running 0 tests
-
-test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
\ No newline at end of file
diff --git a/src/algorithms/chameleon/chameleon.rs b/src/algorithms/chameleon/chameleon.rs
index 4d9553d..0e18e95 100644
--- a/src/algorithms/chameleon/chameleon.rs
+++ b/src/algorithms/chameleon/chameleon.rs
@@ -11,6 +11,9 @@ use crate::io::write_signature::WriteSignature;
 use crate::{BIT_SIZE_U16, BIT_SIZE_U32, BYTE_SIZE_U32};
 use std::slice::{from_raw_parts, from_raw_parts_mut};
 
+#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
+use std::arch::riscv64::*;
+
 pub(crate) const CHAMELEON_HASH_BITS: usize = BIT_SIZE_U16;
 pub(crate) const CHAMELEON_HASH_MULTIPLIER: u32 = 0x9D6EF916;
 
@@ -21,7 +24,6 @@ pub(crate) const PLAIN_PLAIN_FLAGS: u64 = (PLAIN_FLAG << 1) | PLAIN_FLAG;
 pub(crate) const MAP_PLAIN_FLAGS: u64 = (PLAIN_FLAG << 1) | MAP_FLAG;
 pub(crate) const PLAIN_MAP_FLAGS: u64 = (MAP_FLAG << 1) | PLAIN_FLAG;
 // pub(crate) const _MAP_MAP_FLAGS: u64 = (MAP_FLAG << 1) | MAP_FLAG;
-
 pub(crate) const DECODE_TWIN_FLAG_MASK: u64 = 0x3;
 pub(crate) const DECODE_TWIN_FLAG_MASK_BITS: u8 = 2;
 pub(crate) const DECODE_FLAG_MASK: u64 = 0x1;
@@ -88,14 +90,76 @@ impl QuadEncoder for Chameleon {
     fn encode_quad(&mut self, quad: u32, out_buffer: &mut WriteBuffer, signature: &mut WriteSignature) {
         let hash_u16 = (quad.wrapping_mul(CHAMELEON_HASH_MULTIPLIER) >> (BIT_SIZE_U32 - CHAMELEON_HASH_BITS)) as u16;
         let dictionary_value = &mut self.state.chunk_map[hash_u16 as usize];
-        if *dictionary_value != quad {
+        
+        // 检查字典命中
+        if *dictionary_value == quad {
+            // 字典命中，输出哈希引用
+            signature.push_bits(MAP_FLAG, FLAG_SIZE_BITS);
+            out_buffer.push(&hash_u16.to_le_bytes());
+        } else {
+            // 字典未命中，输出原始数据并更新字典
             signature.push_bits(PLAIN_FLAG, FLAG_SIZE_BITS);
             out_buffer.push(&quad.to_le_bytes());
-
             *dictionary_value = quad;
-        } else {
-            signature.push_bits(MAP_FLAG, FLAG_SIZE_BITS);
-            out_buffer.push(&hash_u16.to_le_bytes());
+        }
+    }
+
+    #[inline(always)]
+    fn encode_batch(&mut self, quads: &[u32], out_buffer: &mut WriteBuffer, signature: &mut WriteSignature) {
+        #[cfg(not(all(target_arch = "riscv64", target_feature = "v")))]
+        {
+            for &quad in quads {
+                self.encode_quad(quad, out_buffer, signature);
+            }
+            return;
+        }
+
+        // ... existing code ...
+
+        #[cfg(all(target_arch = "riscv64", target_feature = "v"))]
+        unsafe {
+            let num_quads = quads.len();
+            let mut offset = 0;
+            while offset < num_quads {
+                let remaining = num_quads - offset;
+                let vl = vsetvli(remaining, riscv64::riscv_v_sew::E32, riscv64::riscv_v_lmul::M1, riscv64::riscv_v_ta::TA, riscv64::riscv_v_ma::MA);
+
+                let v_quad = vle32_v_u32m1(quads.as_ptr().add(offset) as *const u32, vl);
+
+                let v_mult = vmul_vx_u32m1(v_quad, CHAMELEON_HASH_MULTIPLIER, vl);
+                let v_hash = vsrl_vx_u32m1(v_mult, BIT_SIZE_U32 - CHAMELEON_HASH_BITS as u32, vl);
+
+                let dict_ptr = self.state.chunk_map.as_mut_ptr();
+                let v_dict = vluxei32_v_u32m1(dict_ptr as *const u32, v_hash, vl);
+
+                let v_mask = vmseq_vv_m_b32(v_dict, v_quad, vl);  // hit mask (true if match)
+
+                let mut quad_arr = vec![0u32; vl];
+                let mut hash_arr = vec![0u32; vl];
+                let mut hit_arr = vec![false; vl];
+
+                vse32_v_u32m1(quad_arr.as_mut_ptr(), v_quad, vl);
+                vse32_v_u32m1(hash_arr.as_mut_ptr(), v_hash, vl);
+
+                for i in 0..vl {
+                    let single_mask = vslidedown_vx_m_b32(v_mask, i as u32, vl);
+                    hit_arr[i] = vfirst_m_b32(single_mask, 1) != -1; 
+                }
+                for i in 0..vl {
+                    let quad = quad_arr[i];
+                    let hash_u16 = hash_arr[i] as u16;
+                    if hit_arr[i] {
+                        signature.push_bits(MAP_FLAG, FLAG_SIZE_BITS);
+                        out_buffer.push(&hash_u16.to_le_bytes());
+                    } else {
+                        signature.push_bits(PLAIN_FLAG, FLAG_SIZE_BITS);
+                        out_buffer.push(&quad.to_le_bytes());
+                        self.state.chunk_map[hash_u16 as usize] = quad;
+                    }
+                }
+
+                offset += vl;
+            }
         }
     }
 }
@@ -104,10 +168,10 @@ impl Decoder for Chameleon {
     #[inline(always)]
     fn decode_unit(&mut self, in_buffer: &mut ReadBuffer, signature: &mut ReadSignature, out_buffer: &mut WriteBuffer) {
         let (quad_a, quad_b) = match signature.read_bits(DECODE_TWIN_FLAG_MASK, DECODE_TWIN_FLAG_MASK_BITS) {
-            PLAIN_PLAIN_FLAGS => { (self.decode_plain(in_buffer), self.decode_plain(in_buffer)) }
-            MAP_PLAIN_FLAGS => { (self.decode_map(in_buffer), self.decode_plain(in_buffer)) }
-            PLAIN_MAP_FLAGS => { (self.decode_plain(in_buffer), self.decode_map(in_buffer)) }
-            _ => { (self.decode_map(in_buffer), self.decode_map(in_buffer)) }
+            PLAIN_PLAIN_FLAGS => (self.decode_plain(in_buffer), self.decode_plain(in_buffer)),
+            MAP_PLAIN_FLAGS => (self.decode_map(in_buffer), self.decode_plain(in_buffer)),
+            PLAIN_MAP_FLAGS => (self.decode_plain(in_buffer), self.decode_map(in_buffer)),
+            _ => (self.decode_map(in_buffer), self.decode_map(in_buffer)),
         };
         out_buffer.push(&quad_a.to_le_bytes());
         out_buffer.push(&quad_b.to_le_bytes());
@@ -119,15 +183,15 @@ impl Decoder for Chameleon {
             let quad = match signature.read_bits(DECODE_FLAG_MASK, DECODE_FLAG_MASK_BITS) {
                 PLAIN_FLAG => {
                     match in_buffer.remaining() {
-                        0 => { return true; }
+                        0 => return true,
                         1..=3 => {
                             out_buffer.push(in_buffer.read(in_buffer.remaining()));
                             return true;
                         }
-                        _ => { self.decode_plain(in_buffer) }
+                        _ => self.decode_plain(in_buffer),
                     }
                 }
-                _ => { self.decode_map(in_buffer) }
+                _ => self.decode_map(in_buffer),
             };
             out_buffer.push(&quad.to_le_bytes());
         }
diff --git a/src/algorithms/cheetah/cheetah.rs b/src/algorithms/cheetah/cheetah.rs
index 22bc648..03470c8 100644
--- a/src/algorithms/cheetah/cheetah.rs
+++ b/src/algorithms/cheetah/cheetah.rs
@@ -14,7 +14,6 @@ use std::slice::{from_raw_parts, from_raw_parts_mut};
 pub(crate) const CHEETAH_HASH_BITS: usize = BIT_SIZE_U16;
 pub(crate) const CHEETAH_HASH_MULTIPLIER: u32 = 0x9D6EF916;
 
-
 pub(crate) const FLAG_SIZE_BITS: u8 = 2;
 pub(crate) const MAP_A_FLAG: u64 = 0x1;
 pub(crate) const MAP_B_FLAG: u64 = 0x2;
@@ -147,6 +146,13 @@ impl QuadEncoder for Cheetah {
         }
         self.state.last_hash = hash_u16;
     }
+
+    #[inline(always)]
+    fn encode_batch(&mut self, quads: &[u32], out_buffer: &mut WriteBuffer, signature: &mut WriteSignature) {
+        for &quad in quads {
+            self.encode_quad(quad, out_buffer, signature);
+        }
+    }
 }
 
 impl Decoder for Cheetah {
@@ -200,4 +206,4 @@ impl Codec for Cheetah {
         self.state.chunk_map.fill(ChunkData { chunk_a: 0, chunk_b: 0 });
         self.state.prediction_map.fill(PredictionData { next: 0 });
     }
-}
\ No newline at end of file
+}
diff --git a/src/algorithms/lion/lion.rs b/src/algorithms/lion/lion.rs
index 7b36c49..08a1e1f 100644
--- a/src/algorithms/lion/lion.rs
+++ b/src/algorithms/lion/lion.rs
@@ -14,7 +14,6 @@ use std::slice::{from_raw_parts, from_raw_parts_mut};
 pub(crate) const LION_HASH_BITS: usize = BIT_SIZE_U16;
 pub(crate) const LION_HASH_MULTIPLIER: u32 = 0x9D6EF916;
 
-
 pub(crate) const FLAG_SIZE_BITS: u8 = 3;
 pub(crate) const PREDICTED_A_FLAG: u64 = 0x1;
 pub(crate) const PREDICTED_B_FLAG: u64 = 0x2;
@@ -268,6 +267,13 @@ impl QuadEncoder for Lion {
 
         self.update_last_hash(hash_u16);
     }
+
+    #[inline(always)]
+    fn encode_batch(&mut self, quads: &[u32], out_buffer: &mut WriteBuffer, signature: &mut WriteSignature) {
+        for &quad in quads {
+            self.encode_quad(quad, out_buffer, signature);
+        }
+    }
 }
 
 impl Decoder for Lion {
@@ -349,4 +355,4 @@ impl Codec for Lion {
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/codec/codec.rs b/src/codec/codec.rs
index 324b55c..0088624 100644
--- a/src/codec/codec.rs
+++ b/src/codec/codec.rs
@@ -7,7 +7,7 @@ use crate::io::read_buffer::ReadBuffer;
 use crate::io::read_signature::ReadSignature;
 use crate::io::write_buffer::WriteBuffer;
 use crate::io::write_signature::WriteSignature;
-use crate::{BYTE_SIZE_U128, BYTE_SIZE_U32};
+use crate::BYTE_SIZE_U32;
 
 pub trait Codec: QuadEncoder + Decoder {
     fn block_size() -> usize;
@@ -39,31 +39,31 @@ pub trait Codec: QuadEncoder + Decoder {
             let mark = out_buffer.index;
             signature.init(out_buffer.index);
             out_buffer.skip(Self::signature_significant_bytes());
-            for sub_block in block.chunks(BYTE_SIZE_U128) {
-                match <&[u8] as TryInto<[u8; BYTE_SIZE_U128]>>::try_into(sub_block) {
-                    Ok(array) => {
-                        let value_u128 = u128::from_le_bytes(array);
-                        self.encode_quad((value_u128 & 0xffffffff) as u32, out_buffer, signature);
-                        self.encode_quad(((value_u128 >> 32) & 0xffffffff) as u32, out_buffer, signature);
-                        self.encode_quad(((value_u128 >> 64) & 0xffffffff) as u32, out_buffer, signature);
-                        self.encode_quad((value_u128 >> 96) as u32, out_buffer, signature);
-                    }
-                    Err(_error) => {
-                        // Less than 16 bytes left
-                        for bytes in sub_block.chunks(BYTE_SIZE_U32) {
-                            match <&[u8] as TryInto<[u8; BYTE_SIZE_U32]>>::try_into(bytes) {
-                                Ok(array) => {
-                                    self.encode_quad(u32::from_le_bytes(array), out_buffer, signature);
-                                }
-                                Err(_error) => {
-                                    // Implicit signature plain flag (0x0)
-                                    out_buffer.push(bytes);
-                                }
-                            }
-                        }
-                    }
+
+            // 统一处理所有数据为小端序的quads
+            let mut all_quads = Vec::new();
+            let mut remaining_bytes = Vec::new();
+            
+            for chunk in block.chunks(BYTE_SIZE_U32) {
+                if chunk.len() == BYTE_SIZE_U32 {
+                    let quad = u32::from_le_bytes(chunk.try_into().unwrap());
+                    all_quads.push(quad);
+                } else {
+                    // 收集不完整的字节，稍后处理
+                    remaining_bytes.extend_from_slice(chunk);
                 }
             }
+            
+            // 先处理所有完整的quads
+            if !all_quads.is_empty() {
+                self.encode_batch(&all_quads, out_buffer, signature);
+            }
+            
+            // 最后处理剩余的不完整字节
+            if !remaining_bytes.is_empty() {
+                out_buffer.push(&remaining_bytes);
+            }
+
             Self::write_signature(out_buffer, signature);
             protection_state.update(out_buffer.index - mark >= Self::block_size());
         }
@@ -124,4 +124,4 @@ pub trait Codec: QuadEncoder + Decoder {
 
         Ok(out_buffer.index)
     }
-}
\ No newline at end of file
+}
diff --git a/src/codec/quad_encoder.rs b/src/codec/quad_encoder.rs
index 8960508..45d89f2 100644
--- a/src/codec/quad_encoder.rs
+++ b/src/codec/quad_encoder.rs
@@ -1,6 +1,12 @@
+// File: src/codec/quad_encoder.rs
+
 use crate::io::write_buffer::WriteBuffer;
 use crate::io::write_signature::WriteSignature;
 
 pub trait QuadEncoder {
+    /// 编码单个 u32 quad
     fn encode_quad(&mut self, quad: u32, out_buffer: &mut WriteBuffer, signature: &mut WriteSignature);
-}
\ No newline at end of file
+
+    /// 批量编码 u32 quads，支持 RVV 或标量实现
+    fn encode_batch(&mut self, quads: &[u32], out_buffer: &mut WriteBuffer, signature: &mut WriteSignature);
+}
diff --git a/src/lib.rs b/src/lib.rs
index 94365aa..4accde7 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,6 @@
+#![cfg_attr(all(target_arch = "riscv64", target_feature = "v"), feature(riscv_ext_intrinsics))]
+#![cfg_attr(all(target_arch = "riscv64", target_feature = "v"), feature(riscv_target_feature))]
+
 pub mod codec;
 pub mod algorithms;
 pub mod buffer;
@@ -83,5 +86,4 @@ mod tests {
             Err(_) => { assert!(false); }
         }
     }
-}
-
+}
\ No newline at end of file