Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
2a27161
feat(encoder): FSE table reuse and offset history optimization
polaz Mar 25, 2026
f1e162e
docs: fix clippy command in copilot-instructions
polaz Mar 25, 2026
5468134
fix(encoder): align fse repeat state with decoder
polaz Mar 26, 2026
e472885
test(encoder): harden fse regressions
polaz Mar 26, 2026
50812ea
test(interop): add reverse fse regression coverage
polaz Mar 26, 2026
a154c95
test(encoder): add single-symbol table regression
polaz Mar 26, 2026
9dd75e3
fix(encoder): avoid single-symbol fse panic
polaz Mar 26, 2026
dd48ad3
docs(encoder): clarify fse cost comparison
polaz Mar 26, 2026
c08ea51
fix(fse): assert table header alignment\n\n- document that single-sym…
polaz Mar 26, 2026
3316ccf
fix(encoder): harden repeat table state\n\n- avoid none unwrap when r…
polaz Mar 26, 2026
9ff9143
fix(ringbuffer): restore branchless copy path\n\n- wire extend_from_w…
polaz Mar 26, 2026
7b72b57
style(rustfmt): apply workspace formatting\n\n- run cargo fmt --all a…
polaz Mar 26, 2026
103be96
test(coverage): cover edition cleanup paths
polaz Mar 26, 2026
5a61263
fix(encoder): tighten repeat table reuse
polaz Mar 26, 2026
18c0721
test(errors): sharpen regression coverage
polaz Mar 26, 2026
3e8fded
fix(fse): silence bench-only warnings
polaz Mar 26, 2026
bc5b794
build(rust): pin workspace toolchain to 1.94
polaz Mar 26, 2026
5d54817
fix(encoder): avoid invalid fse table fallback
polaz Mar 26, 2026
5b4fd90
ci(rust): install i686 target explicitly
polaz Mar 26, 2026
7e92555
ci(rust): pin workflow toolchain to 1.94
polaz Mar 26, 2026
56c1e6d
test(errors): lock display regressions to exact text
polaz Mar 26, 2026
0854328
docs(ci): clarify toolchain and test intent
polaz Mar 26, 2026
2e6edc5
ci(bench): pin benchmark toolchain to 1.94
polaz Mar 26, 2026
8298996
ci(rust): follow stable and pin msrv
polaz Mar 26, 2026
7262247
fix(zstd): tighten wraparound review fixes
polaz Mar 26, 2026
874b249
test(zstd): stabilize review-driven assertions
polaz Mar 26, 2026
95e929e
test(zstd): narrow zero-ll coverage claim
polaz Mar 26, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/copilot-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ Pure Rust zstd implementation — managed fork of [ruzstd (KillingSpark/zstd-rs)

## Rust Code Standards

- **Clippy:** Must pass `cargo clippy --all-features -- -D warnings`
- **Clippy:** Must pass `cargo clippy -p structured-zstd --features hash,std,dict_builder -- -D warnings` (`rustc-dep-of-std` is excluded — it's an internal feature for Rust stdlib builds only; `fuzz_exports` is excluded — fuzzing-specific entry points are validated separately from the regular lint gate)
- This is a fork — avoid suggesting architectural changes that diverge too far from upstream
- Performance-critical code: benchmark before/after any changes
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ jobs:
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
with:
targets: i686-unknown-linux-gnu
- uses: taiki-e/install-action@nextest
- name: Install i686 target
run: rustup target add i686-unknown-linux-gnu
- name: Install 32-bit libs
run: sudo apt-get update && sudo apt-get install -y gcc-multilib
- uses: Swatinem/rust-cache@v2
Expand All @@ -79,7 +79,7 @@ jobs:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
with:
toolchain: "1.92"
toolchain: "1.92.0"
- uses: taiki-e/install-action@nextest
- uses: Swatinem/rust-cache@v2
with:
Expand Down
2 changes: 1 addition & 1 deletion cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ name = "structured-zstd-cli"
version = "0.8.2"
rust-version = "1.92"
authors = ["Moritz Borcherding <moritz.borcherding@web.de>"]
edition = "2018"
edition = "2024"
license = "Apache-2.0"
homepage = "https://github.com/structured-world/structured-zstd"
repository = "https://github.com/structured-world/structured-zstd"
Expand Down
2 changes: 1 addition & 1 deletion cli/src/progress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ mod tests {
assert_eq!(&fmt_duration(Duration::from_secs(5 * 60)), "5m");
assert_eq!(&fmt_duration(Duration::from_secs(3 * 60 * 60)), "3h");
assert_eq!(
&fmt_duration(Duration::from_secs(1 * 60 * 60 + 20 * 60 + 30)),
&fmt_duration(Duration::from_secs(60 * 60 + 20 * 60 + 30)),
"1h 20m 30s"
);
}
Expand Down
5 changes: 5 additions & 0 deletions rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[toolchain]
# Follow the latest stable toolchain by default.
# MSRV remains 1.92.0 and is verified separately via `rust-version` plus the CI msrv job.
channel = "stable"
components = ["clippy", "rustfmt"]
2 changes: 1 addition & 1 deletion zstd/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ authors = [
"Moritz Borcherding <moritz.borcherding@web.de>",
"Structured World Foundation <foundation@sw.foundation>",
]
edition = "2018"
edition = "2024"
license = "Apache-2.0"
homepage = "https://github.com/structured-world/structured-zstd"
repository = "https://github.com/structured-world/structured-zstd"
Expand Down
2 changes: 1 addition & 1 deletion zstd/benches/compare_ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//! Five variations: decompress (pure Rust/C FFI), compress (pure Rust/C FFI L1/L3).
//! Both decompress benchmarks allocate per-iteration for symmetric comparison.

use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};

/// Compressed corpus for decompression benchmarks.
const COMPRESSED_CORPUS: &[u8] = include_bytes!("../decodecorpus_files/z000033.zst");
Expand Down
2 changes: 1 addition & 1 deletion zstd/benches/decode_all.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use criterion::{criterion_group, criterion_main, Criterion};
use criterion::{Criterion, criterion_group, criterion_main};
use structured_zstd::decoding::FrameDecoder;

fn criterion_benchmark(c: &mut Criterion) {
Expand Down
29 changes: 24 additions & 5 deletions zstd/src/bit_io/bit_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,10 @@ impl<V: AsMut<Vec<u8>>> BitWriter<V> {
/// dumping
pub fn dump(mut self) -> V {
if self.misaligned() != 0 {
panic!("`dump` was called on a bit writer but an even number of bytes weren't written into the buffer. Was: {}", self.index())
panic!(
"`dump` was called on a bit writer but an even number of bytes weren't written into the buffer. Was: {}",
self.index()
)
}
self.flush();
debug_assert_eq!(self.partial, 0);
Expand Down Expand Up @@ -248,7 +251,11 @@ mod tests {
bw.write_bits(0b1111u8, 4);
bw.write_bits(0b0000u8, 4);
let output = bw.dump();
assert!(output.len() == 1, "Single byte written into writer returned a vec that wasn't one byte, vec was {} elements long", output.len());
assert!(
output.len() == 1,
"Single byte written into writer returned a vec that wasn't one byte, vec was {} elements long",
output.len()
);
assert_eq!(
0b0000_1111, output[0],
"4 bits and 4 bits written into buffer"
Expand All @@ -262,7 +269,11 @@ mod tests {
bw.write_bits(0b111u8, 3);
bw.write_bits(0b0_0000u8, 5);
let output = bw.dump();
assert!(output.len() == 1, "Single byte written into writer return a vec that wasn't one byte, vec was {} elements long", output.len());
assert!(
output.len() == 1,
"Single byte written into writer return a vec that wasn't one byte, vec was {} elements long",
output.len()
);
assert_eq!(0b0000_0111, output[0], "3 and 5 bits written into buffer");
}

Expand All @@ -273,7 +284,11 @@ mod tests {
bw.write_bits(0b1u8, 1);
bw.write_bits(0u8, 7);
let output = bw.dump();
assert!(output.len() == 1, "Single byte written into writer return a vec that wasn't one byte, vec was {} elements long", output.len());
assert!(
output.len() == 1,
"Single byte written into writer return a vec that wasn't one byte, vec was {} elements long",
output.len()
);
assert_eq!(0b0000_0001, output[0], "1 and 7 bits written into buffer");
}

Expand All @@ -283,7 +298,11 @@ mod tests {
let mut bw = BitWriter::new();
bw.write_bits(1u8, 8);
let output = bw.dump();
assert!(output.len() == 1, "Single byte written into writer return a vec that wasn't one byte, vec was {} elements long", output.len());
assert!(
output.len() == 1,
"Single byte written into writer return a vec that wasn't one byte, vec was {} elements long",
output.len()
);
assert_eq!(1, output[0], "1 and 7 bits written into buffer");
}

Expand Down
6 changes: 4 additions & 2 deletions zstd/src/decoding/block_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ impl BlockDecoder {
DecoderState::ReadyToDecodeNextBody => { /* Happy :) */ }
DecoderState::Failed => return Err(DecodeBlockContentError::DecoderStateIsFailed),
DecoderState::ReadyToDecodeNextHeader => {
return Err(DecodeBlockContentError::ExpectedHeaderOfPreviousBlock)
return Err(DecodeBlockContentError::ExpectedHeaderOfPreviousBlock);
}
}

Expand Down Expand Up @@ -108,7 +108,9 @@ impl BlockDecoder {
}

BlockType::Reserved => {
panic!("How did you even get this. The decoder should error out if it detects a reserved-type block");
panic!(
"How did you even get this. The decoder should error out if it detects a reserved-type block"
);
}

BlockType::Compressed => {
Expand Down
146 changes: 130 additions & 16 deletions zstd/src/decoding/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,9 @@ impl core::fmt::Display for BlockTypeError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
BlockTypeError::InvalidBlocktypeNumber { num } => {
write!(f,
"Invalid Blocktype number. Is: {num} Should be one of: 0, 1, 2, 3 (3 is reserved though",
write!(
f,
"Invalid Blocktype number. Is: {num}. Should be one of: 0, 1, 2, 3 (3 is reserved).",
)
Comment thread
polaz marked this conversation as resolved.
}
}
Expand Down Expand Up @@ -291,7 +292,8 @@ impl core::fmt::Display for DecompressBlockError {
expected_len,
remaining_bytes,
} => {
write!(f,
write!(
f,
"Malformed section header. Says literals would be this long: {expected_len} but there are only {remaining_bytes} bytes left",
)
}
Expand Down Expand Up @@ -370,9 +372,10 @@ impl core::fmt::Display for DecodeBlockContentError {
)
}
DecodeBlockContentError::ExpectedHeaderOfPreviousBlock => {
write!(f,
"Can't decode next block body, while expecting to decode the header of the previous block. Results will be nonsense",
)
write!(
f,
"Can't decode next block body, while expecting to decode the header of the previous block. Results will be nonsense",
)
}
DecodeBlockContentError::ReadError { step, source } => {
write!(f, "Error while reading bytes for {step}: {source}",)
Expand Down Expand Up @@ -545,10 +548,16 @@ impl core::fmt::Display for FrameDecoderError {
)
}
FrameDecoderError::TargetTooSmall => {
write!(f, "Target must have at least as many bytes as the contentsize of the frame reports")
write!(
f,
"Target must have at least as many bytes as the content size reported by the frame"
)
}
FrameDecoderError::DictNotProvided { dict_id } => {
write!(f, "Frame header specified dictionary id 0x{dict_id:X} that wasnt provided by add_dict() or reset_with_dict()")
write!(
f,
"Frame header specified dictionary id 0x{dict_id:X} that wasn't provided via add_dict() or reset_with_dict()"
)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
}
}
Expand Down Expand Up @@ -609,12 +618,14 @@ impl core::fmt::Display for DecompressLiteralsError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
DecompressLiteralsError::MissingCompressedSize => {
write!(f,
write!(
f,
"compressed size was none even though it must be set to something for compressed literals",
)
}
DecompressLiteralsError::MissingNumStreams => {
write!(f,
write!(
f,
"num_streams was none even though it must be set to something (1 or 4) for compressed literals",
)
}
Expand All @@ -637,7 +648,8 @@ impl core::fmt::Display for DecompressLiteralsError {
)
}
DecompressLiteralsError::ExtraPadding { skipped_bits } => {
write!(f,
write!(
f,
"Padding at the end of the sequence_section was more than a byte long: {skipped_bits} bits. Probably caused by data corruption",
)
}
Expand Down Expand Up @@ -754,7 +766,8 @@ impl core::fmt::Display for DecodeSequenceError {
DecodeSequenceError::FSEDecoderError(e) => write!(f, "{e:?}"),
DecodeSequenceError::FSETableError(e) => write!(f, "{e:?}"),
DecodeSequenceError::ExtraPadding { skipped_bits } => {
write!(f,
write!(
f,
"Padding at the end of the sequence_section was more than a byte long: {skipped_bits} bits. Probably caused by data corruption",
)
}
Expand Down Expand Up @@ -929,7 +942,8 @@ impl core::fmt::Display for FSETableError {
expected_sum,
symbol_probabilities,
} => {
write!(f,
write!(
f,
"The counter ({got}) exceeded the expected sum: {expected_sum}. This means an error or corrupted data \n {symbol_probabilities:?}",
)
}
Expand Down Expand Up @@ -1047,10 +1061,14 @@ impl core::fmt::Display for HuffmanTableError {
got_bytes,
expected_bytes,
} => {
write!(f, "Header says there should be {expected_bytes} bytes for the weights but there are only {got_bytes} bytes in the stream")
write!(
f,
"Header says there should be {expected_bytes} bytes for the weights but there are only {got_bytes} bytes in the stream"
)
}
HuffmanTableError::ExtraPadding { skipped_bits } => {
write!(f,
write!(
f,
"Padding at the end of the sequence_section was more than a byte long: {skipped_bits} bits. Probably caused by data corruption",
)
}
Expand All @@ -1076,7 +1094,8 @@ impl core::fmt::Display for HuffmanTableError {
used,
available_bytes,
} => {
write!(f,
write!(
f,
"FSE table used more bytes: {used} than were meant to be used for the whole stream of huffman weights ({available_bytes})",
)
}
Expand Down Expand Up @@ -1149,3 +1168,98 @@ impl From<GetBitsError> for HuffmanDecoderError {
Self::GetBitsError(val)
}
}

#[cfg(test)]
mod tests {
use alloc::{string::ToString, vec};

use super::{
BlockTypeError, DecodeBlockContentError, DecodeSequenceError, DecompressBlockError,
DecompressLiteralsError, FSETableError, FrameDecoderError, HuffmanTableError,
};

#[test]
fn block_and_sequence_display_messages_are_specific() {
assert_eq!(
BlockTypeError::InvalidBlocktypeNumber { num: 7 }.to_string(),
"Invalid Blocktype number. Is: 7. Should be one of: 0, 1, 2, 3 (3 is reserved)."
);
assert_eq!(
DecompressBlockError::MalformedSectionHeader {
expected_len: 12,
remaining_bytes: 3,
}
.to_string(),
"Malformed section header. Says literals would be this long: 12 but there are only 3 bytes left"
);
assert_eq!(
DecodeBlockContentError::ExpectedHeaderOfPreviousBlock.to_string(),
"Can't decode next block body, while expecting to decode the header of the previous block. Results will be nonsense"
);
assert_eq!(
DecodeSequenceError::ExtraPadding { skipped_bits: 11 }.to_string(),
"Padding at the end of the sequence_section was more than a byte long: 11 bits. Probably caused by data corruption"
);
}

#[test]
fn frame_decoder_display_messages_are_specific() {
assert_eq!(
FrameDecoderError::TargetTooSmall.to_string(),
"Target must have at least as many bytes as the content size reported by the frame"
);
assert_eq!(
FrameDecoderError::DictNotProvided { dict_id: 0xABCD }.to_string(),
"Frame header specified dictionary id 0xABCD that wasn't provided via add_dict() or reset_with_dict()"
);
}

#[test]
fn literal_display_messages_are_specific() {
assert_eq!(
DecompressLiteralsError::MissingCompressedSize.to_string(),
"compressed size was none even though it must be set to something for compressed literals"
);
assert_eq!(
DecompressLiteralsError::MissingNumStreams.to_string(),
"num_streams was none even though it must be set to something (1 or 4) for compressed literals"
);
assert_eq!(
DecompressLiteralsError::ExtraPadding { skipped_bits: 9 }.to_string(),
"Padding at the end of the sequence_section was more than a byte long: 9 bits. Probably caused by data corruption"
);
}

#[test]
fn fse_and_huffman_display_messages_are_specific() {
assert_eq!(
FSETableError::ProbabilityCounterMismatch {
got: 4,
expected_sum: 3,
symbol_probabilities: vec![1, -1],
}
.to_string(),
"The counter (4) exceeded the expected sum: 3. This means an error or corrupted data \n [1, -1]"
);
assert_eq!(
HuffmanTableError::NotEnoughBytesForWeights {
got_bytes: 2,
expected_bytes: 5,
}
.to_string(),
"Header says there should be 5 bytes for the weights but there are only 2 bytes in the stream"
);
assert_eq!(
HuffmanTableError::ExtraPadding { skipped_bits: 13 }.to_string(),
"Padding at the end of the sequence_section was more than a byte long: 13 bits. Probably caused by data corruption"
);
assert_eq!(
HuffmanTableError::FSETableUsedTooManyBytes {
used: 7,
available_bytes: 6,
}
.to_string(),
"FSE table used more bytes: 7 than were meant to be used for the whole stream of huffman weights (6)"
);
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
Loading
Loading