Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions zstd/src/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,8 @@ pub const MAX_WINDOW_SIZE: u64 = (1 << 41) + 7 * (1 << 38);
///
/// <https://github.com/facebook/zstd/blob/eca205fc7849a61ab287492931a04960ac58e031/doc/educational_decoder/zstd_decompress.c#L28-L29>
pub const MAX_BLOCK_SIZE: u32 = 128 * 1024;

/// Implementation limit for window size (100 MiB) to protect against
/// malformed frames. The zstd spec allows much larger windows, but this
/// cap prevents excessive memory allocation on untrusted input.
pub const MAXIMUM_ALLOWED_WINDOW_SIZE: u64 = 1024 * 1024 * 100;
12 changes: 11 additions & 1 deletion zstd/src/decoding/block_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,16 @@ pub fn new() -> BlockDecoder {
}

impl BlockDecoder {
/// Decode the body of a single block described by `header` from `source` into `workspace`.
///
/// Returns the number of bytes consumed from `source`.
/// The decode buffer inside `workspace` may be reserved or grown during
/// decoding. For some block types the decompressed size is known up front,
/// but this is not guaranteed before any data is written.
pub fn decode_block_content(
&mut self,
header: &BlockHeader,
workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees
workspace: &mut DecoderScratch,
mut source: impl Read,
) -> Result<u64, DecodeBlockContentError> {
match self.internal_state {
Expand All @@ -58,6 +64,8 @@ impl BlockDecoder {
let full_reads = header.decompressed_size / BATCH_SIZE as u32;
let single_read_size = header.decompressed_size % BATCH_SIZE as u32;

workspace.buffer.reserve(header.decompressed_size as usize);

source.read_exact(&mut buf[0..1]).map_err(|err| {
DecodeBlockContentError::ReadError {
step: block_type,
Expand All @@ -84,6 +92,8 @@ impl BlockDecoder {
let full_reads = header.decompressed_size / BATCH_SIZE as u32;
let single_read_size = header.decompressed_size % BATCH_SIZE as u32;

workspace.buffer.reserve(header.decompressed_size as usize);

for _ in 0..full_reads {
source.read_exact(&mut buf[..]).map_err(|err| {
DecodeBlockContentError::ReadError {
Expand Down
9 changes: 9 additions & 0 deletions zstd/src/decoding/decode_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,15 @@ impl DecodeBuffer {
self.buffer.len()
}

/// Pre-allocate capacity for `amount` additional bytes.
///
/// Call this before a batch of `push`/`repeat` operations to avoid
/// repeated re-allocations inside the hot decode loop.
#[inline]
pub fn reserve(&mut self, amount: usize) {
self.buffer.reserve(amount);
}
Comment thread
polaz marked this conversation as resolved.

pub fn push(&mut self, data: &[u8]) {
self.buffer.extend(data);
self.total_output_counter += data.len() as u64;
Expand Down
4 changes: 2 additions & 2 deletions zstd/src/decoding/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -531,9 +531,9 @@ impl core::fmt::Display for FrameDecoderError {
FrameDecoderError::WindowSizeTooBig { requested } => {
write!(
f,
"Specified window_size is too big; Requested: {}, Max: {}",
"Specified window_size is too big; Requested: {}, Allowed: {}",
requested,
crate::common::MAX_WINDOW_SIZE,
crate::common::MAXIMUM_ALLOWED_WINDOW_SIZE,
)
Comment thread
polaz marked this conversation as resolved.
}
FrameDecoderError::DictionaryDecodeError(e) => {
Expand Down
24 changes: 20 additions & 4 deletions zstd/src/decoding/frame_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@ use alloc::collections::BTreeMap;
use alloc::vec::Vec;
use core::convert::TryInto;

/// While the maximum window size allowed by the spec is significantly larger,
/// our implementation limits it to 100mb to protect against malformed frames.
const MAXIMUM_ALLOWED_WINDOW_SIZE: u64 = 1024 * 1024 * 100;
use crate::common::MAXIMUM_ALLOWED_WINDOW_SIZE;

/// Low level Zstandard decoder that can be used to decompress frames with fine control over when and how many bytes are decoded.
///
Expand Down Expand Up @@ -93,20 +91,38 @@ pub enum BlockDecodingStrategy {
}

impl FrameDecoderState {
/// Read the frame header from `source` and create a new decoder state.
///
/// Pre-allocates the decode buffer to `window_size` so the first block
/// does not trigger incremental growth from zero capacity.
pub fn new(source: impl Read) -> Result<FrameDecoderState, FrameDecoderError> {
let (frame, header_size) = frame::read_frame_header(source)?;
let window_size = frame.window_size()?;

if window_size > MAXIMUM_ALLOWED_WINDOW_SIZE {
return Err(FrameDecoderError::WindowSizeTooBig {
requested: window_size,
});
}
Comment thread
polaz marked this conversation as resolved.

let mut decoder_scratch = DecoderScratch::new(window_size as usize);
decoder_scratch.buffer.reserve(window_size as usize);
Ok(FrameDecoderState {
frame_header: frame,
frame_finished: false,
block_counter: 0,
decoder_scratch: DecoderScratch::new(window_size as usize),
decoder_scratch,
bytes_read_counter: u64::from(header_size),
check_sum: None,
using_dict: None,
})
}

/// Reset this state for a new frame read from `source`, reusing existing allocations.
///
/// `DecodeBuffer::reset` reserves `window_size` internally, so no
/// additional frame-level reservation is needed here. Further buffer
/// growth during decoding is performed on demand by the active block path.
pub fn reset(&mut self, source: impl Read) -> Result<(), FrameDecoderError> {
let (frame_header, header_size) = frame::read_frame_header(source)?;
let window_size = frame_header.window_size()?;
Expand Down
7 changes: 7 additions & 0 deletions zstd/src/decoding/sequence_execution.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use super::prefetch;
use super::scratch::DecoderScratch;
use crate::common::MAX_BLOCK_SIZE;
use crate::decoding::errors::ExecuteSequencesError;

/// Take the provided decoder and execute the sequences stored within
Expand All @@ -8,6 +9,12 @@ pub fn execute_sequences(scratch: &mut DecoderScratch) -> Result<(), ExecuteSequ
let old_buffer_size = scratch.buffer.len();
let mut seq_sum = 0;

// Reserve once for the maximum possible decoded block output (128 KB per
// the zstd spec). This avoids repeated re-allocations inside the hot
// execute loop without an extra scan over the sequence vector, and is
// inherently bounded against corrupted inputs.
scratch.buffer.reserve(MAX_BLOCK_SIZE as usize);

for idx in 0..scratch.sequences.len() {
let seq = scratch.sequences[idx];

Expand Down
Loading