From 3c2f35837296bd4b3b751e0e1beb4229e8c41d56 Mon Sep 17 00:00:00 2001 From: mattsu Date: Wed, 10 Dec 2025 20:11:14 +0900 Subject: [PATCH 1/6] perf(base32): optimize read buffer allocation in fast encode/decode Refactor buffer creation from zero-initialized vectors to pre-allocated Vec with_capacity, using unsafe set_len to avoid unnecessary zeroing, improving performance without affecting correctness, as only initialized bytes from Read::read are accessed. --- src/uu/base32/src/base_common.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index 108a287866f..9d2385f7768 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -552,7 +552,13 @@ pub mod fast_encode { let mut leftover_buffer = VecDeque::::new(); let mut encoded_buffer = VecDeque::::new(); - let mut read_buffer = vec![0u8; encode_in_chunks_of_size.max(DEFAULT_BUFFER_SIZE)]; + let read_buffer_capacity = encode_in_chunks_of_size.max(DEFAULT_BUFFER_SIZE); + let mut read_buffer = Vec::with_capacity(read_buffer_capacity); + // SAFETY: We immediately pass the whole slice to `Read::read`, which + // writes up to `read_buffer_capacity` bytes. We then only access the + // prefix `[0..read]` reported by `read`, so uninitialized tail bytes + // are never read. + unsafe { read_buffer.set_len(read_buffer_capacity) }; loop { let read = input @@ -788,7 +794,10 @@ pub mod fast_decode { let mut buffer = Vec::with_capacity(decode_in_chunks_of_size); let mut decoded_buffer = Vec::::new(); - let mut read_buffer = [0u8; DEFAULT_BUFFER_SIZE]; + let mut read_buffer = Vec::with_capacity(DEFAULT_BUFFER_SIZE); + // SAFETY: Same rationale as in `fast_encode_stream`; we only read the + // initialized prefix returned by `Read::read`. + unsafe { read_buffer.set_len(DEFAULT_BUFFER_SIZE) }; loop { let read = input From 9c11035d0d434a33ab070d829beca470a8563e45 Mon Sep 17 00:00:00 2001 From: mattsu Date: Wed, 10 Dec 2025 21:22:18 +0900 Subject: [PATCH 2/6] refactor: use MaybeUninit for safer buffer handling in base32 encode/decode Replaced manual unsafe `set_len` calls and direct reads into uninitialized vectors with `MaybeUninit::slice_assume_init_mut` to prevent potential memory safety issues and improve code reliability in `fast_encode` and `fast_decode` modules. Added buffer clearing to ensure proper reuse. --- src/uu/base32/src/base_common.rs | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index 9d2385f7768..865c0315b62 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -301,6 +301,7 @@ pub mod fast_encode { use std::{ cmp::min, collections::VecDeque, + mem::MaybeUninit, io::{self, Read, Write}, num::NonZeroUsize, }; @@ -553,22 +554,22 @@ pub mod fast_encode { let mut encoded_buffer = VecDeque::::new(); let read_buffer_capacity = encode_in_chunks_of_size.max(DEFAULT_BUFFER_SIZE); - let mut read_buffer = Vec::with_capacity(read_buffer_capacity); - // SAFETY: We immediately pass the whole slice to `Read::read`, which - // writes up to `read_buffer_capacity` bytes. We then only access the - // prefix `[0..read]` reported by `read`, so uninitialized tail bytes - // are never read. - unsafe { read_buffer.set_len(read_buffer_capacity) }; + let mut read_buffer = Vec::::with_capacity(read_buffer_capacity); loop { + let spare = read_buffer.spare_capacity_mut(); let read = input - .read(&mut read_buffer) + .read(unsafe { MaybeUninit::slice_assume_init_mut(spare) }) .map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?; if read == 0 { break; } + // SAFETY: `read` bytes have just been initialized by `read`. + unsafe { read_buffer.set_len(read_buffer.len() + read) }; + leftover_buffer.extend(&read_buffer[..read]); + read_buffer.clear(); while leftover_buffer.len() >= encode_in_chunks_of_size { { @@ -612,6 +613,7 @@ pub mod fast_encode { pub mod fast_decode { use crate::base_common::DEFAULT_BUFFER_SIZE; use std::io::{self, Read, Write}; + use std::mem::MaybeUninit; use uucore::{ encoding::SupportsFastDecodeAndEncode, error::{UResult, USimpleError}, @@ -794,19 +796,19 @@ pub mod fast_decode { let mut buffer = Vec::with_capacity(decode_in_chunks_of_size); let mut decoded_buffer = Vec::::new(); - let mut read_buffer = Vec::with_capacity(DEFAULT_BUFFER_SIZE); - // SAFETY: Same rationale as in `fast_encode_stream`; we only read the - // initialized prefix returned by `Read::read`. - unsafe { read_buffer.set_len(DEFAULT_BUFFER_SIZE) }; + let mut read_buffer = Vec::::with_capacity(DEFAULT_BUFFER_SIZE); loop { + let spare = read_buffer.spare_capacity_mut(); let read = input - .read(&mut read_buffer) + .read(unsafe { MaybeUninit::slice_assume_init_mut(spare) }) .map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?; if read == 0 { break; } + unsafe { read_buffer.set_len(read_buffer.len() + read) }; + for &byte in &read_buffer[..read] { if byte == b'\n' || byte == b'\r' { continue; @@ -859,6 +861,8 @@ pub mod fast_decode { buffer.clear(); } } + + read_buffer.clear(); } if supports_partial_decode { From 46efaa5446d683f2c47c9db7a9da2a9109573864 Mon Sep 17 00:00:00 2001 From: mattsu Date: Wed, 10 Dec 2025 21:37:54 +0900 Subject: [PATCH 3/6] refactor(base32): replace MaybeUninit::slice_assume_init_mut with slice::from_raw_parts_mut Replace unsafe usage of `MaybeUninit::slice_assume_init_mut` with `slice::from_raw_parts_mut` in the fast_encode and fast_decode modules for reading data into the spare capacity of buffers. This change maintains safety guarantees through updated comments while potentially improving code clarity and performance by avoiding MaybeUninit initialization assumptions. The modification ensures the buffer's uninitialized tail is correctly handled as raw bytes during I/O operations. --- src/uu/base32/src/base_common.rs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index 865c0315b62..ff0b01820e0 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -301,7 +301,7 @@ pub mod fast_encode { use std::{ cmp::min, collections::VecDeque, - mem::MaybeUninit, + slice, io::{self, Read, Write}, num::NonZeroUsize, }; @@ -559,7 +559,12 @@ pub mod fast_encode { loop { let spare = read_buffer.spare_capacity_mut(); let read = input - .read(unsafe { MaybeUninit::slice_assume_init_mut(spare) }) + .read(unsafe { + // SAFETY: `spare` points to uninitialized capacity of `read_buffer`. + // We transmute it to `[u8]` for the read call; only the first `read` + // bytes become initialized and we set_len accordingly below. + slice::from_raw_parts_mut(spare.as_mut_ptr() as *mut u8, spare.len()) + }) .map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?; if read == 0 { break; @@ -613,7 +618,7 @@ pub mod fast_encode { pub mod fast_decode { use crate::base_common::DEFAULT_BUFFER_SIZE; use std::io::{self, Read, Write}; - use std::mem::MaybeUninit; + use std::slice; use uucore::{ encoding::SupportsFastDecodeAndEncode, error::{UResult, USimpleError}, @@ -801,7 +806,11 @@ pub mod fast_decode { loop { let spare = read_buffer.spare_capacity_mut(); let read = input - .read(unsafe { MaybeUninit::slice_assume_init_mut(spare) }) + .read(unsafe { + // SAFETY: `spare` is the uninitialized tail of `read_buffer`; we + // transmute it to `[u8]` for reading, then set the initialized len. + slice::from_raw_parts_mut(spare.as_mut_ptr() as *mut u8, spare.len()) + }) .map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?; if read == 0 { break; From 5db1fe0d30ce9e57e62048b5f05f07552e2477e2 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 11 Dec 2025 18:44:14 +0900 Subject: [PATCH 4/6] refactor(base32): reorder std imports in base_common.rs for consistency Moved the `slice` import from after `collections::VecDeque` to after `num::NonZeroUsize` to better align with the module's import grouping style. --- src/uu/base32/src/base_common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index ff0b01820e0..86202cc17a2 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -301,9 +301,9 @@ pub mod fast_encode { use std::{ cmp::min, collections::VecDeque, - slice, io::{self, Read, Write}, num::NonZeroUsize, + slice, }; use uucore::{ encoding::SupportsFastDecodeAndEncode, From dbeacf7c7d3cb588c3d2d95afe7676016e21056c Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 13 Dec 2025 23:12:55 +0900 Subject: [PATCH 5/6] refactor(base32): remove unsafe buffer handling in encode/decode Replace unsafe spare_capacity_mut and from_raw_parts_mut usage with safe Vec initialization and direct read calls in fast_encode and fast_decode. This eliminates potential safety risks while preserving buffer functionality. --- src/uu/base32/src/base_common.rs | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index bdfa4d72cc8..81b14a1ea2a 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -298,7 +298,6 @@ pub mod fast_encode { collections::VecDeque, io::{self, Read, Write}, num::NonZeroUsize, - slice, }; use uucore::{ encoding::SupportsFastDecodeAndEncode, @@ -549,27 +548,17 @@ pub mod fast_encode { let mut encoded_buffer = VecDeque::::new(); let read_buffer_capacity = encode_in_chunks_of_size.max(DEFAULT_BUFFER_SIZE); - let mut read_buffer = Vec::::with_capacity(read_buffer_capacity); + let mut read_buffer = vec![0u8; read_buffer_capacity]; loop { - let spare = read_buffer.spare_capacity_mut(); let read = input - .read(unsafe { - // SAFETY: `spare` points to uninitialized capacity of `read_buffer`. - // We transmute it to `[u8]` for the read call; only the first `read` - // bytes become initialized and we set_len accordingly below. - slice::from_raw_parts_mut(spare.as_mut_ptr() as *mut u8, spare.len()) - }) + .read(&mut read_buffer) .map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?; if read == 0 { break; } - // SAFETY: `read` bytes have just been initialized by `read`. - unsafe { read_buffer.set_len(read_buffer.len() + read) }; - leftover_buffer.extend(&read_buffer[..read]); - read_buffer.clear(); while leftover_buffer.len() >= encode_in_chunks_of_size { { @@ -613,7 +602,6 @@ pub mod fast_encode { pub mod fast_decode { use crate::base_common::DEFAULT_BUFFER_SIZE; use std::io::{self, Read, Write}; - use std::slice; use uucore::{ encoding::SupportsFastDecodeAndEncode, error::{UResult, USimpleError}, @@ -796,23 +784,16 @@ pub mod fast_decode { let mut buffer = Vec::with_capacity(decode_in_chunks_of_size); let mut decoded_buffer = Vec::::new(); - let mut read_buffer = Vec::::with_capacity(DEFAULT_BUFFER_SIZE); + let mut read_buffer = vec![0u8; DEFAULT_BUFFER_SIZE]; loop { - let spare = read_buffer.spare_capacity_mut(); let read = input - .read(unsafe { - // SAFETY: `spare` is the uninitialized tail of `read_buffer`; we - // transmute it to `[u8]` for reading, then set the initialized len. - slice::from_raw_parts_mut(spare.as_mut_ptr() as *mut u8, spare.len()) - }) + .read(&mut read_buffer) .map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?; if read == 0 { break; } - unsafe { read_buffer.set_len(read_buffer.len() + read) }; - for &byte in &read_buffer[..read] { if byte == b'\n' || byte == b'\r' { continue; @@ -865,8 +846,6 @@ pub mod fast_decode { buffer.clear(); } } - - read_buffer.clear(); } if supports_partial_decode { From 27b72b41d8fe7bf53b39623f9e88f55073b0803c Mon Sep 17 00:00:00 2001 From: mattsu Date: Wed, 17 Dec 2025 18:33:05 +0900 Subject: [PATCH 6/6] perf(base32): optimize input handling by switching to BufRead for efficient buffering Switch from unbuffered Read to BufRead in get_input, handle_input, and fast_encode_stream functions. This reduces syscalls by leveraging buffered reads, improving performance for base32 encoding/decoding operations. Refactor fast_encode_stream to use fill_buf() and manage leftover buffers more efficiently. --- src/uu/base32/src/base_common.rs | 117 ++++++++++++++++++++----------- 1 file changed, 77 insertions(+), 40 deletions(-) diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index 81b14a1ea2a..d7f7a9ce9f4 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -8,7 +8,7 @@ use clap::{Arg, ArgAction, Command}; use std::ffi::OsString; use std::fs::File; -use std::io::{self, BufReader, ErrorKind, Read, Write}; +use std::io::{self, BufRead, BufReader, ErrorKind, Write}; use std::path::{Path, PathBuf}; use uucore::display::Quotable; use uucore::encoding::{ @@ -146,20 +146,26 @@ pub fn base_app(about: String, usage: String) -> Command { ) } -pub fn get_input(config: &Config) -> UResult> { +pub fn get_input(config: &Config) -> UResult> { match &config.to_read { Some(path_buf) => { let file = File::open(path_buf).map_err_context(|| path_buf.maybe_quote().to_string())?; - Ok(Box::new(BufReader::new(file))) + Ok(Box::new(BufReader::with_capacity( + DEFAULT_BUFFER_SIZE, + file, + ))) } None => { // Stdin is already buffered by the OS; wrap once more to reduce syscalls per read. - Ok(Box::new(BufReader::new(io::stdin()))) + Ok(Box::new(BufReader::with_capacity( + DEFAULT_BUFFER_SIZE, + io::stdin(), + ))) } } } -pub fn handle_input(input: &mut R, format: Format, config: Config) -> UResult<()> { +pub fn handle_input(input: &mut R, format: Format, config: Config) -> UResult<()> { // Always allow padding for Base64 to avoid a full pre-scan of the input. let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format, config.decode, true); @@ -292,11 +298,11 @@ pub fn get_supports_fast_decode_and_encode( } pub mod fast_encode { - use crate::base_common::{DEFAULT_BUFFER_SIZE, WRAP_DEFAULT}; + use crate::base_common::WRAP_DEFAULT; use std::{ cmp::min, collections::VecDeque, - io::{self, Read, Write}, + io::{self, BufRead, Write}, num::NonZeroUsize, }; use uucore::{ @@ -519,7 +525,7 @@ pub mod fast_encode { /// Remaining bytes are encoded and flushed at the end. I/O or encoding /// failures are propagated via `UResult`. pub fn fast_encode_stream( - input: &mut dyn Read, + input: &mut dyn BufRead, output: &mut dyn Write, supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, wrap: Option, @@ -544,48 +550,79 @@ pub mod fast_encode { }; // Buffers - let mut leftover_buffer = VecDeque::::new(); let mut encoded_buffer = VecDeque::::new(); - - let read_buffer_capacity = encode_in_chunks_of_size.max(DEFAULT_BUFFER_SIZE); - let mut read_buffer = vec![0u8; read_buffer_capacity]; + let mut leftover_buffer = Vec::::with_capacity(encode_in_chunks_of_size); loop { - let read = input - .read(&mut read_buffer) + let read_buffer = input + .fill_buf() .map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?; - if read == 0 { + if read_buffer.is_empty() { break; } - leftover_buffer.extend(&read_buffer[..read]); + let mut consumed = 0; - while leftover_buffer.len() >= encode_in_chunks_of_size { - { - let contiguous = leftover_buffer.make_contiguous(); + if !leftover_buffer.is_empty() { + let needed = encode_in_chunks_of_size - leftover_buffer.len(); + let take = needed.min(read_buffer.len()); + leftover_buffer.extend_from_slice(&read_buffer[..take]); + consumed += take; + + if leftover_buffer.len() == encode_in_chunks_of_size { encode_in_chunks_to_buffer( supports_fast_decode_and_encode, - &contiguous[..encode_in_chunks_of_size], + leftover_buffer.as_slice(), + &mut encoded_buffer, + )?; + leftover_buffer.clear(); + + write_to_output( + &mut line_wrapping, &mut encoded_buffer, + output, + false, + wrap == Some(0), )?; } + } - // Drop the data we just encoded - leftover_buffer.drain(..encode_in_chunks_of_size); + let remaining = &read_buffer[consumed..]; + let full_chunk_bytes = + (remaining.len() / encode_in_chunks_of_size) * encode_in_chunks_of_size; - write_to_output( - &mut line_wrapping, - &mut encoded_buffer, - output, - false, - wrap == Some(0), - )?; + if full_chunk_bytes > 0 { + for chunk in remaining[..full_chunk_bytes].chunks_exact(encode_in_chunks_of_size) { + encode_in_chunks_to_buffer( + supports_fast_decode_and_encode, + chunk, + &mut encoded_buffer, + )?; + write_to_output( + &mut line_wrapping, + &mut encoded_buffer, + output, + false, + wrap == Some(0), + )?; + } + consumed += full_chunk_bytes; + } + + if consumed < read_buffer.len() { + leftover_buffer.extend_from_slice(&read_buffer[consumed..]); + consumed = read_buffer.len(); } + + input.consume(consumed); + + // `leftover_buffer` should never exceed one partial chunk. + debug_assert!(leftover_buffer.len() < encode_in_chunks_of_size); } // Encode any remaining bytes and flush supports_fast_decode_and_encode - .encode_to_vec_deque(leftover_buffer.make_contiguous(), &mut encoded_buffer)?; + .encode_to_vec_deque(&leftover_buffer, &mut encoded_buffer)?; write_to_output( &mut line_wrapping, @@ -600,8 +637,7 @@ pub mod fast_encode { } pub mod fast_decode { - use crate::base_common::DEFAULT_BUFFER_SIZE; - use std::io::{self, Read, Write}; + use std::io::{self, BufRead, Write}; use uucore::{ encoding::SupportsFastDecodeAndEncode, error::{UResult, USimpleError}, @@ -631,7 +667,6 @@ pub mod fast_decode { fn write_to_output(decoded_buffer: &mut Vec, output: &mut dyn Write) -> io::Result<()> { // Write all data in `decoded_buffer` to `output` output.write_all(decoded_buffer.as_slice())?; - output.flush()?; decoded_buffer.clear(); @@ -765,7 +800,7 @@ pub mod fast_decode { } pub fn fast_decode_stream( - input: &mut dyn Read, + input: &mut dyn BufRead, output: &mut dyn Write, supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, ignore_garbage: bool, @@ -784,17 +819,17 @@ pub mod fast_decode { let mut buffer = Vec::with_capacity(decode_in_chunks_of_size); let mut decoded_buffer = Vec::::new(); - let mut read_buffer = vec![0u8; DEFAULT_BUFFER_SIZE]; loop { - let read = input - .read(&mut read_buffer) + let read_buffer = input + .fill_buf() .map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?; - if read == 0 { + let read_len = read_buffer.len(); + if read_len == 0 { break; } - for &byte in &read_buffer[..read] { + for &byte in read_buffer { if byte == b'\n' || byte == b'\r' { continue; } @@ -846,6 +881,8 @@ pub mod fast_decode { buffer.clear(); } } + + input.consume(read_len); } if supports_partial_decode { @@ -903,7 +940,7 @@ fn format_read_error(kind: ErrorKind) -> String { /// Determines if the input buffer contains any padding ('=') ignoring trailing whitespace. #[cfg(test)] -fn read_and_has_padding(input: &mut R) -> UResult<(bool, Vec)> { +fn read_and_has_padding(input: &mut R) -> UResult<(bool, Vec)> { let mut buf = Vec::new(); input .read_to_end(&mut buf)