From a163dbc4bb1377005ff420aaf3c17dd70dfac141 Mon Sep 17 00:00:00 2001
From: Tony Arcieri <bascule@gmail.com>
Date: Tue, 1 Mar 2022 20:02:28 -0700
Subject: [PATCH] base64ct: add linewrapping support to `Encoder`

Adds support for buffered incremental Base64 encoding which inserts
newline characters at a specified line width.

This commit also moves the `LineEnding` type previously defined in
`pem-rfc7468` into `base64ct`, and re-exporting it.
---
 base64ct/src/decoder.rs     |  10 +-
 base64ct/src/encoder.rs     | 179 ++++++++++++++++++++++++++++++------
 base64ct/src/lib.rs         |   5 +
 base64ct/src/line_ending.rs |  53 +++++++++++
 pem-rfc7468/src/encoder.rs  |  49 +---------
 pem-rfc7468/src/lib.rs      |   3 +-
 6 files changed, 216 insertions(+), 83 deletions(-)
 create mode 100644 base64ct/src/line_ending.rs
diff --git a/base64ct/src/decoder.rs b/base64ct/src/decoder.rs
index 3a5cf34b7..3c0cadaba 100644
--- a/base64ct/src/decoder.rs
+++ b/base64ct/src/decoder.rs
@@ -2,9 +2,11 @@
 
 use crate::{
     encoding,
+    line_ending::{CHAR_CR, CHAR_LF},
     variant::Variant,
     Encoding,
     Error::{self, InvalidLength},
+    MIN_LINE_WIDTH,
 };
 use core::{cmp, marker::PhantomData};
 
@@ -17,12 +19,6 @@ use std::io;
 #[cfg(docsrs)]
 use crate::{Base64, Base64Unpadded};
 
-/// Carriage return
-const CHAR_CR: u8 = 0x0d;
-
-/// Line feed
-const CHAR_LF: u8 = 0x0a;
-
 /// Stateful Base64 decoder with support for buffered, incremental decoding.
 ///
 /// The `E` type parameter can be any type which impls [`Encoding`] such as
@@ -420,7 +416,7 @@ impl<'i> LineReader<'i> {
 
     /// Create a new reader which operates over linewrapped data.
     fn new_wrapped(bytes: &'i [u8], line_width: usize) -> Result<Self, Error> {
-        if line_width < 4 {
+        if line_width < MIN_LINE_WIDTH {
             return Err(InvalidLength);
         }
 
diff --git a/base64ct/src/encoder.rs b/base64ct/src/encoder.rs
index c8ef38bf3..4fc7f37c2 100644
--- a/base64ct/src/encoder.rs
+++ b/base64ct/src/encoder.rs
@@ -4,6 +4,7 @@ use crate::{
     variant::Variant,
     Encoding,
     Error::{self, InvalidLength},
+    LineEnding, MIN_LINE_WIDTH,
 };
 use core::{cmp, marker::PhantomData, str};
 
@@ -29,13 +30,18 @@ pub struct Encoder<'o, E: Variant> {
     /// Block buffer used for non-block-aligned data.
     block_buffer: BlockBuffer,
 
+    /// Configuration and state for line-wrapping the output at a specified
+    /// column.
+    line_wrapper: Option<LineWrapper>,
+
     /// Phantom parameter for the Base64 encoding in use.
     encoding: PhantomData<E>,
 }
 
 impl<'o, E: Variant> Encoder<'o, E> {
-    /// Create a new decoder for a byte slice containing contiguous
-    /// (non-newline-delimited) Base64-encoded data.
+    /// Create a new encoder which writes output to the given byte slice.
+    ///
+    /// Output constructed using this method is not line-wrapped.
     pub fn new(output: &'o mut [u8]) -> Result<Self, Error> {
         if output.is_empty() {
             return Err(InvalidLength);
@@ -45,10 +51,29 @@ impl<'o, E: Variant> Encoder<'o, E> {
             output,
             position: 0,
             block_buffer: BlockBuffer::default(),
+            line_wrapper: None,
             encoding: PhantomData,
         })
     }
 
+    /// Create a new encoder which writes line-wrapped output to the given byte
+    /// slice.
+    ///
+    /// Output will be wrapped at the specified interval, using the provided
+    /// line ending. Use [`LineEnding::default()`] to use the conventional line
+    /// ending for the target OS.
+    ///
+    /// Minimum allowed line width is 4.
+    pub fn new_wrapped(
+        output: &'o mut [u8],
+        width: usize,
+        ending: LineEnding,
+    ) -> Result<Self, Error> {
+        let mut encoder = Self::new(output)?;
+        encoder.line_wrapper = Some(LineWrapper::new(width, ending)?);
+        Ok(encoder)
+    }
+
     /// Encode the provided buffer as Base64, writing it to the output buffer.
     ///
     /// # Returns
@@ -57,23 +82,30 @@ impl<'o, E: Variant> Encoder<'o, E> {
     pub fn encode(&mut self, mut input: &[u8]) -> Result<(), Error> {
         // If there's data in the block buffer, fill it
         if !self.block_buffer.is_empty() {
-            self.fill_block_buffer(&mut input)?;
+            self.process_buffer(&mut input)?;
         }
 
-        // Attempt to decode a stride of block-aligned data
-        let in_blocks = input.len() / 3;
-        let out_blocks = self.remaining().len() / 4;
-        let blocks = cmp::min(in_blocks, out_blocks);
+        while !input.is_empty() {
+            // Attempt to encode a stride of block-aligned data
+            let in_blocks = input.len() / 3;
+            let out_blocks = self.remaining().len() / 4;
+            let mut blocks = cmp::min(in_blocks, out_blocks);
 
-        if blocks > 0 {
-            let (in_aligned, in_rem) = input.split_at(blocks * 3);
-            input = in_rem;
-            self.perform_encode(in_aligned)?;
-        }
+            // When line wrapping, cap the block-aligned stride at near/at line length
+            if let Some(line_wrapper) = &self.line_wrapper {
+                line_wrapper.wrap_blocks(&mut blocks);
+            }
+
+            if blocks > 0 {
+                let (in_aligned, in_rem) = input.split_at(blocks * 3);
+                input = in_rem;
+                self.perform_encode(in_aligned)?;
+            }
 
-        // If there's remaining non-aligned data, fill the block buffer
-        if !input.is_empty() {
-            self.fill_block_buffer(&mut input)?;
+            // If there's remaining non-aligned data, fill the block buffer
+            if !input.is_empty() {
+                self.process_buffer(&mut input)?;
+            }
         }
 
         Ok(())
@@ -97,7 +129,7 @@ impl<'o, E: Variant> Encoder<'o, E> {
 
     /// Fill the block buffer with data, consuming and encoding it when the
     /// buffer is full.
-    fn fill_block_buffer(&mut self, input: &mut &[u8]) -> Result<(), Error> {
+    fn process_buffer(&mut self, input: &mut &[u8]) -> Result<(), Error> {
         self.block_buffer.fill(input);
 
         if self.block_buffer.is_full() {
@@ -110,8 +142,14 @@ impl<'o, E: Variant> Encoder<'o, E> {
 
     /// Perform Base64 encoding operation.
     fn perform_encode(&mut self, input: &[u8]) -> Result<usize, Error> {
-        let len = E::encode(input, self.remaining())?.as_bytes().len();
-        self.position += len;
+        let mut len = E::encode(input, self.remaining())?.as_bytes().len();
+
+        // Insert newline characters into the output as needed
+        if let Some(line_wrapper) = &mut self.line_wrapper {
+            line_wrapper.insert_newlines(&mut self.output[self.position..], &mut len)?;
+        }
+
+        self.position = self.position.checked_add(len).ok_or(InvalidLength)?;
         Ok(len)
     }
 }
@@ -160,28 +198,115 @@ impl BlockBuffer {
     }
 }
 
+/// Helper for wrapping Base64 at a given line width.
+#[derive(Debug)]
+struct LineWrapper {
+    /// Number of bytes remaining in the current line.
+    remaining: usize,
+
+    /// Column at which Base64 should be wrapped.
+    width: usize,
+
+    /// Newline characters to use at the end of each line.
+    ending: LineEnding,
+}
+
+impl LineWrapper {
+    /// Create a new linewrapper.
+    fn new(width: usize, ending: LineEnding) -> Result<Self, Error> {
+        if width < MIN_LINE_WIDTH {
+            return Err(InvalidLength);
+        }
+
+        Ok(Self {
+            remaining: width,
+            width,
+            ending,
+        })
+    }
+
+    /// Wrap the number of blocks to encode near/at EOL.
+    fn wrap_blocks(&self, blocks: &mut usize) {
+        if (*blocks * 4) >= self.remaining {
+            *blocks = self.remaining / 4;
+        }
+    }
+
+    /// Insert newlines into the output buffer as needed.
+    fn insert_newlines(&mut self, mut buffer: &mut [u8], len: &mut usize) -> Result<(), Error> {
+        let mut buffer_len = *len;
+
+        if buffer_len < self.remaining {
+            self.remaining = self
+                .remaining
+                .checked_sub(buffer_len)
+                .ok_or(InvalidLength)?;
+
+            return Ok(());
+        }
+
+        buffer = &mut buffer[self.remaining..];
+        buffer_len = buffer_len
+            .checked_sub(self.remaining)
+            .ok_or(InvalidLength)?;
+
+        // The `wrap_blocks` function should ensure the buffer is smaller than a Base64 block
+        debug_assert!(buffer_len < 4, "buffer exceeds 4-bytes");
+
+        if buffer_len + self.ending.len() >= buffer.len() {
+            // Not enough space in buffer to add newlines
+            return Err(InvalidLength);
+        }
+
+        // Shift the buffer contents to make space for the line ending
+        for i in (0..buffer_len).rev() {
+            buffer[i + self.ending.len()] = buffer[i];
+        }
+
+        buffer[..self.ending.len()].copy_from_slice(self.ending.as_bytes());
+        *len = (*len).checked_add(self.ending.len()).ok_or(InvalidLength)?;
+        self.remaining = self.width.checked_sub(buffer_len).ok_or(InvalidLength)?;
+
+        Ok(())
+    }
+}
+
 #[cfg(test)]
 mod tests {
-    use crate::{test_vectors::*, variant::Variant, Base64, Base64Unpadded, Encoder};
+    use crate::{test_vectors::*, variant::Variant, Base64, Base64Unpadded, Encoder, LineEnding};
 
     #[test]
     fn encode_padded() {
-        encode_test::<Base64>(PADDED_BIN, PADDED_BASE64);
+        encode_test::<Base64>(PADDED_BIN, PADDED_BASE64, None);
     }
 
     #[test]
     fn encode_unpadded() {
-        encode_test::<Base64Unpadded>(UNPADDED_BIN, UNPADDED_BASE64);
+        encode_test::<Base64Unpadded>(UNPADDED_BIN, UNPADDED_BASE64, None);
+    }
+
+    #[test]
+    fn encode_multiline_padded() {
+        encode_test::<Base64>(MULTILINE_PADDED_BIN, MULTILINE_PADDED_BASE64, Some(70));
+    }
+
+    #[test]
+    fn encode_multiline_unpadded() {
+        encode_test::<Base64Unpadded>(MULTILINE_UNPADDED_BIN, MULTILINE_UNPADDED_BASE64, Some(70));
     }
 
     /// Core functionality of an encoding test.
-    fn encode_test<V>(input: &[u8], expected: &str)
-    where
-        V: Variant,
-    {
+    fn encode_test<V: Variant>(input: &[u8], expected: &str, wrapped: Option<usize>) {
+        let mut buffer = [0u8; 1024];
+
         for chunk_size in 1..input.len() {
-            let mut buffer = [0u8; 1024];
-            let mut encoder = Encoder::<V>::new(&mut buffer).unwrap();
+            let mut encoder = match wrapped {
+                Some(line_width) => {
+                    Encoder::<V>::new_wrapped(&mut buffer, line_width, LineEnding::LF)
+                }
+                None => Encoder::<V>::new(&mut buffer),
+            }
+            .unwrap();
 
             for chunk in input.chunks(chunk_size) {
                 encoder.encode(chunk).unwrap();
diff --git a/base64ct/src/lib.rs b/base64ct/src/lib.rs
index b0ebe6b41..775bacb70 100644
--- a/base64ct/src/lib.rs
+++ b/base64ct/src/lib.rs
@@ -75,6 +75,7 @@ mod decoder;
 mod encoder;
 mod encoding;
 mod errors;
+mod line_ending;
 mod variant;
 
 #[cfg(test)]
@@ -85,6 +86,7 @@ pub use crate::{
     encoder::Encoder,
     encoding::Encoding,
     errors::{Error, InvalidEncodingError, InvalidLengthError},
+    line_ending::LineEnding,
     variant::{
         bcrypt::Base64Bcrypt,
         crypt::Base64Crypt,
@@ -92,3 +94,6 @@ pub use crate::{
         url::{Base64Url, Base64UrlUnpadded},
     },
 };
+
+/// Minimum supported line width.
+const MIN_LINE_WIDTH: usize = 4;
diff --git a/base64ct/src/line_ending.rs b/base64ct/src/line_ending.rs
new file mode 100644
index 000000000..dfb168eab
--- /dev/null
+++ b/base64ct/src/line_ending.rs
@@ -0,0 +1,53 @@
+//! Line endings.
+
+/// Carriage return
+pub(crate) const CHAR_CR: u8 = 0x0d;
+
+/// Line feed
+pub(crate) const CHAR_LF: u8 = 0x0a;
+
+/// Line endings: variants of newline characters that can be used with Base64.
+///
+/// Use [`LineEnding::default`] to get an appropriate line ending for the
+/// current operating system.
+#[allow(clippy::upper_case_acronyms)]
+#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)]
+pub enum LineEnding {
+    /// Carriage return: `\r` (Pre-OS X Macintosh)
+    CR,
+
+    /// Line feed: `\n` (Unix OSes)
+    LF,
+
+    /// Carriage return + line feed: `\r\n` (Windows)
+    CRLF,
+}
+
+impl Default for LineEnding {
+    // Default line ending matches conventions for target OS
+    #[cfg(windows)]
+    fn default() -> LineEnding {
+        LineEnding::CRLF
+    }
+    #[cfg(not(windows))]
+    fn default() -> LineEnding {
+        LineEnding::LF
+    }
+}
+
+#[allow(clippy::len_without_is_empty)]
+impl LineEnding {
+    /// Get the byte serialization of this [`LineEnding`].
+    pub fn as_bytes(self) -> &'static [u8] {
+        match self {
+            LineEnding::CR => &[CHAR_CR],
+            LineEnding::LF => &[CHAR_LF],
+            LineEnding::CRLF => &[CHAR_CR, CHAR_LF],
+        }
+    }
+
+    /// Get the encoded length of this [`LineEnding`].
+    pub fn len(self) -> usize {
+        self.as_bytes().len()
+    }
+}
diff --git a/pem-rfc7468/src/encoder.rs b/pem-rfc7468/src/encoder.rs
index 6fe30503d..768ea02a2 100644
--- a/pem-rfc7468/src/encoder.rs
+++ b/pem-rfc7468/src/encoder.rs
@@ -1,8 +1,7 @@
 //! PEM encoder.
 
 use crate::{
-    grammar::{self, CHAR_CR, CHAR_LF},
-    Error, Result, BASE64_WRAP_WIDTH, ENCAPSULATION_BOUNDARY_DELIMITER,
+    grammar, Error, LineEnding, Result, BASE64_WRAP_WIDTH, ENCAPSULATION_BOUNDARY_DELIMITER,
     POST_ENCAPSULATION_BOUNDARY, PRE_ENCAPSULATION_BOUNDARY,
 };
 use base64ct::{Base64, Encoding};
@@ -62,52 +61,6 @@ pub fn encode_string(label: &str, line_ending: LineEnding, input: &[u8]) -> Resu
     String::from_utf8(buf).map_err(|_| Error::CharacterEncoding)
 }
 
-/// Line endings.
-///
-/// Use [`LineEnding::default`] to get an appropriate line ending for the
-/// current operating system.
-#[allow(clippy::upper_case_acronyms)]
-#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)]
-pub enum LineEnding {
-    /// Carriage return: `\r` (Pre-OS X Macintosh)
-    CR,
-
-    /// Line feed: `\n` (Unix OSes)
-    LF,
-
-    /// Carriage return + line feed: `\r\n` (Windows)
-    CRLF,
-}
-
-impl Default for LineEnding {
-    /// Use the line ending for the current OS
-    #[cfg(windows)]
-    fn default() -> LineEnding {
-        LineEnding::CRLF
-    }
-    #[cfg(not(windows))]
-    fn default() -> LineEnding {
-        LineEnding::LF
-    }
-}
-
-#[allow(clippy::len_without_is_empty)]
-impl LineEnding {
-    /// Get the byte serialization of this [`LineEnding`].
-    pub fn as_bytes(self) -> &'static [u8] {
-        match self {
-            LineEnding::CR => &[CHAR_CR],
-            LineEnding::LF => &[CHAR_LF],
-            LineEnding::CRLF => &[CHAR_CR, CHAR_LF],
-        }
-    }
-
-    /// Get the encoded length of this [`LineEnding`].
-    pub fn len(self) -> usize {
-        self.as_bytes().len()
-    }
-}
-
 /// Output buffer for writing encoded PEM output.
 struct Buffer<'a> {
     /// Backing byte slice where PEM output is being written.
diff --git a/pem-rfc7468/src/lib.rs b/pem-rfc7468/src/lib.rs
index fd9e4c803..bf8d8a54f 100644
--- a/pem-rfc7468/src/lib.rs
+++ b/pem-rfc7468/src/lib.rs
@@ -63,9 +63,10 @@ mod grammar;
 
 pub use crate::{
     decoder::{decode, decode_label, Decoder},
-    encoder::{encode, encoded_len, LineEnding},
+    encoder::{encode, encoded_len},
     error::{Error, Result},
 };
+pub use base64ct::LineEnding;
 
 #[cfg(feature = "alloc")]
 pub use crate::{decoder::decode_vec, encoder::encode_string};