From a163dbc4bb1377005ff420aaf3c17dd70dfac141 Mon Sep 17 00:00:00 2001 From: Tony Arcieri Date: Tue, 1 Mar 2022 20:02:28 -0700 Subject: [PATCH] base64ct: add linewrapping support to `Encoder` Adds support for buffered incremental Base64 encoding which inserts newline characters at a specified line width. This commit also moves the `LineEnding` type previously defined in `pem-rfc7468` into `base64ct`, and re-exporting it. --- base64ct/src/decoder.rs | 10 +- base64ct/src/encoder.rs | 179 ++++++++++++++++++++++++++++++------ base64ct/src/lib.rs | 5 + base64ct/src/line_ending.rs | 53 +++++++++++ pem-rfc7468/src/encoder.rs | 49 +--------- pem-rfc7468/src/lib.rs | 3 +- 6 files changed, 216 insertions(+), 83 deletions(-) create mode 100644 base64ct/src/line_ending.rs diff --git a/base64ct/src/decoder.rs b/base64ct/src/decoder.rs index 3a5cf34b7..3c0cadaba 100644 --- a/base64ct/src/decoder.rs +++ b/base64ct/src/decoder.rs @@ -2,9 +2,11 @@ use crate::{ encoding, + line_ending::{CHAR_CR, CHAR_LF}, variant::Variant, Encoding, Error::{self, InvalidLength}, + MIN_LINE_WIDTH, }; use core::{cmp, marker::PhantomData}; @@ -17,12 +19,6 @@ use std::io; #[cfg(docsrs)] use crate::{Base64, Base64Unpadded}; -/// Carriage return -const CHAR_CR: u8 = 0x0d; - -/// Line feed -const CHAR_LF: u8 = 0x0a; - /// Stateful Base64 decoder with support for buffered, incremental decoding. /// /// The `E` type parameter can be any type which impls [`Encoding`] such as @@ -420,7 +416,7 @@ impl<'i> LineReader<'i> { /// Create a new reader which operates over linewrapped data. fn new_wrapped(bytes: &'i [u8], line_width: usize) -> Result { - if line_width < 4 { + if line_width < MIN_LINE_WIDTH { return Err(InvalidLength); } diff --git a/base64ct/src/encoder.rs b/base64ct/src/encoder.rs index c8ef38bf3..4fc7f37c2 100644 --- a/base64ct/src/encoder.rs +++ b/base64ct/src/encoder.rs @@ -4,6 +4,7 @@ use crate::{ variant::Variant, Encoding, Error::{self, InvalidLength}, + LineEnding, MIN_LINE_WIDTH, }; use core::{cmp, marker::PhantomData, str}; @@ -29,13 +30,18 @@ pub struct Encoder<'o, E: Variant> { /// Block buffer used for non-block-aligned data. block_buffer: BlockBuffer, + /// Configuration and state for line-wrapping the output at a specified + /// column. + line_wrapper: Option, + /// Phantom parameter for the Base64 encoding in use. encoding: PhantomData, } impl<'o, E: Variant> Encoder<'o, E> { - /// Create a new decoder for a byte slice containing contiguous - /// (non-newline-delimited) Base64-encoded data. + /// Create a new encoder which writes output to the given byte slice. + /// + /// Output constructed using this method is not line-wrapped. pub fn new(output: &'o mut [u8]) -> Result { if output.is_empty() { return Err(InvalidLength); @@ -45,10 +51,29 @@ impl<'o, E: Variant> Encoder<'o, E> { output, position: 0, block_buffer: BlockBuffer::default(), + line_wrapper: None, encoding: PhantomData, }) } + /// Create a new encoder which writes line-wrapped output to the given byte + /// slice. + /// + /// Output will be wrapped at the specified interval, using the provided + /// line ending. Use [`LineEnding::default()`] to use the conventional line + /// ending for the target OS. + /// + /// Minimum allowed line width is 4. + pub fn new_wrapped( + output: &'o mut [u8], + width: usize, + ending: LineEnding, + ) -> Result { + let mut encoder = Self::new(output)?; + encoder.line_wrapper = Some(LineWrapper::new(width, ending)?); + Ok(encoder) + } + /// Encode the provided buffer as Base64, writing it to the output buffer. /// /// # Returns @@ -57,23 +82,30 @@ impl<'o, E: Variant> Encoder<'o, E> { pub fn encode(&mut self, mut input: &[u8]) -> Result<(), Error> { // If there's data in the block buffer, fill it if !self.block_buffer.is_empty() { - self.fill_block_buffer(&mut input)?; + self.process_buffer(&mut input)?; } - // Attempt to decode a stride of block-aligned data - let in_blocks = input.len() / 3; - let out_blocks = self.remaining().len() / 4; - let blocks = cmp::min(in_blocks, out_blocks); + while !input.is_empty() { + // Attempt to encode a stride of block-aligned data + let in_blocks = input.len() / 3; + let out_blocks = self.remaining().len() / 4; + let mut blocks = cmp::min(in_blocks, out_blocks); - if blocks > 0 { - let (in_aligned, in_rem) = input.split_at(blocks * 3); - input = in_rem; - self.perform_encode(in_aligned)?; - } + // When line wrapping, cap the block-aligned stride at near/at line length + if let Some(line_wrapper) = &self.line_wrapper { + line_wrapper.wrap_blocks(&mut blocks); + } + + if blocks > 0 { + let (in_aligned, in_rem) = input.split_at(blocks * 3); + input = in_rem; + self.perform_encode(in_aligned)?; + } - // If there's remaining non-aligned data, fill the block buffer - if !input.is_empty() { - self.fill_block_buffer(&mut input)?; + // If there's remaining non-aligned data, fill the block buffer + if !input.is_empty() { + self.process_buffer(&mut input)?; + } } Ok(()) @@ -97,7 +129,7 @@ impl<'o, E: Variant> Encoder<'o, E> { /// Fill the block buffer with data, consuming and encoding it when the /// buffer is full. - fn fill_block_buffer(&mut self, input: &mut &[u8]) -> Result<(), Error> { + fn process_buffer(&mut self, input: &mut &[u8]) -> Result<(), Error> { self.block_buffer.fill(input); if self.block_buffer.is_full() { @@ -110,8 +142,14 @@ impl<'o, E: Variant> Encoder<'o, E> { /// Perform Base64 encoding operation. fn perform_encode(&mut self, input: &[u8]) -> Result { - let len = E::encode(input, self.remaining())?.as_bytes().len(); - self.position += len; + let mut len = E::encode(input, self.remaining())?.as_bytes().len(); + + // Insert newline characters into the output as needed + if let Some(line_wrapper) = &mut self.line_wrapper { + line_wrapper.insert_newlines(&mut self.output[self.position..], &mut len)?; + } + + self.position = self.position.checked_add(len).ok_or(InvalidLength)?; Ok(len) } } @@ -160,28 +198,115 @@ impl BlockBuffer { } } +/// Helper for wrapping Base64 at a given line width. +#[derive(Debug)] +struct LineWrapper { + /// Number of bytes remaining in the current line. + remaining: usize, + + /// Column at which Base64 should be wrapped. + width: usize, + + /// Newline characters to use at the end of each line. + ending: LineEnding, +} + +impl LineWrapper { + /// Create a new linewrapper. + fn new(width: usize, ending: LineEnding) -> Result { + if width < MIN_LINE_WIDTH { + return Err(InvalidLength); + } + + Ok(Self { + remaining: width, + width, + ending, + }) + } + + /// Wrap the number of blocks to encode near/at EOL. + fn wrap_blocks(&self, blocks: &mut usize) { + if (*blocks * 4) >= self.remaining { + *blocks = self.remaining / 4; + } + } + + /// Insert newlines into the output buffer as needed. + fn insert_newlines(&mut self, mut buffer: &mut [u8], len: &mut usize) -> Result<(), Error> { + let mut buffer_len = *len; + + if buffer_len < self.remaining { + self.remaining = self + .remaining + .checked_sub(buffer_len) + .ok_or(InvalidLength)?; + + return Ok(()); + } + + buffer = &mut buffer[self.remaining..]; + buffer_len = buffer_len + .checked_sub(self.remaining) + .ok_or(InvalidLength)?; + + // The `wrap_blocks` function should ensure the buffer is smaller than a Base64 block + debug_assert!(buffer_len < 4, "buffer exceeds 4-bytes"); + + if buffer_len + self.ending.len() >= buffer.len() { + // Not enough space in buffer to add newlines + return Err(InvalidLength); + } + + // Shift the buffer contents to make space for the line ending + for i in (0..buffer_len).rev() { + buffer[i + self.ending.len()] = buffer[i]; + } + + buffer[..self.ending.len()].copy_from_slice(self.ending.as_bytes()); + *len = (*len).checked_add(self.ending.len()).ok_or(InvalidLength)?; + self.remaining = self.width.checked_sub(buffer_len).ok_or(InvalidLength)?; + + Ok(()) + } +} + #[cfg(test)] mod tests { - use crate::{test_vectors::*, variant::Variant, Base64, Base64Unpadded, Encoder}; + use crate::{test_vectors::*, variant::Variant, Base64, Base64Unpadded, Encoder, LineEnding}; #[test] fn encode_padded() { - encode_test::(PADDED_BIN, PADDED_BASE64); + encode_test::(PADDED_BIN, PADDED_BASE64, None); } #[test] fn encode_unpadded() { - encode_test::(UNPADDED_BIN, UNPADDED_BASE64); + encode_test::(UNPADDED_BIN, UNPADDED_BASE64, None); + } + + #[test] + fn encode_multiline_padded() { + encode_test::(MULTILINE_PADDED_BIN, MULTILINE_PADDED_BASE64, Some(70)); + } + + #[test] + fn encode_multiline_unpadded() { + encode_test::(MULTILINE_UNPADDED_BIN, MULTILINE_UNPADDED_BASE64, Some(70)); } /// Core functionality of an encoding test. - fn encode_test(input: &[u8], expected: &str) - where - V: Variant, - { + fn encode_test(input: &[u8], expected: &str, wrapped: Option) { + let mut buffer = [0u8; 1024]; + for chunk_size in 1..input.len() { - let mut buffer = [0u8; 1024]; - let mut encoder = Encoder::::new(&mut buffer).unwrap(); + let mut encoder = match wrapped { + Some(line_width) => { + Encoder::::new_wrapped(&mut buffer, line_width, LineEnding::LF) + } + None => Encoder::::new(&mut buffer), + } + .unwrap(); for chunk in input.chunks(chunk_size) { encoder.encode(chunk).unwrap(); diff --git a/base64ct/src/lib.rs b/base64ct/src/lib.rs index b0ebe6b41..775bacb70 100644 --- a/base64ct/src/lib.rs +++ b/base64ct/src/lib.rs @@ -75,6 +75,7 @@ mod decoder; mod encoder; mod encoding; mod errors; +mod line_ending; mod variant; #[cfg(test)] @@ -85,6 +86,7 @@ pub use crate::{ encoder::Encoder, encoding::Encoding, errors::{Error, InvalidEncodingError, InvalidLengthError}, + line_ending::LineEnding, variant::{ bcrypt::Base64Bcrypt, crypt::Base64Crypt, @@ -92,3 +94,6 @@ pub use crate::{ url::{Base64Url, Base64UrlUnpadded}, }, }; + +/// Minimum supported line width. +const MIN_LINE_WIDTH: usize = 4; diff --git a/base64ct/src/line_ending.rs b/base64ct/src/line_ending.rs new file mode 100644 index 000000000..dfb168eab --- /dev/null +++ b/base64ct/src/line_ending.rs @@ -0,0 +1,53 @@ +//! Line endings. + +/// Carriage return +pub(crate) const CHAR_CR: u8 = 0x0d; + +/// Line feed +pub(crate) const CHAR_LF: u8 = 0x0a; + +/// Line endings: variants of newline characters that can be used with Base64. +/// +/// Use [`LineEnding::default`] to get an appropriate line ending for the +/// current operating system. +#[allow(clippy::upper_case_acronyms)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] +pub enum LineEnding { + /// Carriage return: `\r` (Pre-OS X Macintosh) + CR, + + /// Line feed: `\n` (Unix OSes) + LF, + + /// Carriage return + line feed: `\r\n` (Windows) + CRLF, +} + +impl Default for LineEnding { + // Default line ending matches conventions for target OS + #[cfg(windows)] + fn default() -> LineEnding { + LineEnding::CRLF + } + #[cfg(not(windows))] + fn default() -> LineEnding { + LineEnding::LF + } +} + +#[allow(clippy::len_without_is_empty)] +impl LineEnding { + /// Get the byte serialization of this [`LineEnding`]. + pub fn as_bytes(self) -> &'static [u8] { + match self { + LineEnding::CR => &[CHAR_CR], + LineEnding::LF => &[CHAR_LF], + LineEnding::CRLF => &[CHAR_CR, CHAR_LF], + } + } + + /// Get the encoded length of this [`LineEnding`]. + pub fn len(self) -> usize { + self.as_bytes().len() + } +} diff --git a/pem-rfc7468/src/encoder.rs b/pem-rfc7468/src/encoder.rs index 6fe30503d..768ea02a2 100644 --- a/pem-rfc7468/src/encoder.rs +++ b/pem-rfc7468/src/encoder.rs @@ -1,8 +1,7 @@ //! PEM encoder. use crate::{ - grammar::{self, CHAR_CR, CHAR_LF}, - Error, Result, BASE64_WRAP_WIDTH, ENCAPSULATION_BOUNDARY_DELIMITER, + grammar, Error, LineEnding, Result, BASE64_WRAP_WIDTH, ENCAPSULATION_BOUNDARY_DELIMITER, POST_ENCAPSULATION_BOUNDARY, PRE_ENCAPSULATION_BOUNDARY, }; use base64ct::{Base64, Encoding}; @@ -62,52 +61,6 @@ pub fn encode_string(label: &str, line_ending: LineEnding, input: &[u8]) -> Resu String::from_utf8(buf).map_err(|_| Error::CharacterEncoding) } -/// Line endings. -/// -/// Use [`LineEnding::default`] to get an appropriate line ending for the -/// current operating system. -#[allow(clippy::upper_case_acronyms)] -#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] -pub enum LineEnding { - /// Carriage return: `\r` (Pre-OS X Macintosh) - CR, - - /// Line feed: `\n` (Unix OSes) - LF, - - /// Carriage return + line feed: `\r\n` (Windows) - CRLF, -} - -impl Default for LineEnding { - /// Use the line ending for the current OS - #[cfg(windows)] - fn default() -> LineEnding { - LineEnding::CRLF - } - #[cfg(not(windows))] - fn default() -> LineEnding { - LineEnding::LF - } -} - -#[allow(clippy::len_without_is_empty)] -impl LineEnding { - /// Get the byte serialization of this [`LineEnding`]. - pub fn as_bytes(self) -> &'static [u8] { - match self { - LineEnding::CR => &[CHAR_CR], - LineEnding::LF => &[CHAR_LF], - LineEnding::CRLF => &[CHAR_CR, CHAR_LF], - } - } - - /// Get the encoded length of this [`LineEnding`]. - pub fn len(self) -> usize { - self.as_bytes().len() - } -} - /// Output buffer for writing encoded PEM output. struct Buffer<'a> { /// Backing byte slice where PEM output is being written. diff --git a/pem-rfc7468/src/lib.rs b/pem-rfc7468/src/lib.rs index fd9e4c803..bf8d8a54f 100644 --- a/pem-rfc7468/src/lib.rs +++ b/pem-rfc7468/src/lib.rs @@ -63,9 +63,10 @@ mod grammar; pub use crate::{ decoder::{decode, decode_label, Decoder}, - encoder::{encode, encoded_len, LineEnding}, + encoder::{encode, encoded_len}, error::{Error, Result}, }; +pub use base64ct::LineEnding; #[cfg(feature = "alloc")] pub use crate::{decoder::decode_vec, encoder::encode_string};