-
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
remove utf8 dependency. #4460
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
remove utf8 dependency. #4460
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
2e75372
remove utf8 crate dependency.
publicmatt 1abdadf
Merge branch 'main' into replace_utf8_crate
publicmatt 858ed8b
ignore word
sylvestre b35f2ef
fix typo
sylvestre ab0c4f5
ignore word
sylvestre 018c85b
ignore word
cakebaker 1a9df85
wc: remove "test" module without tests
cakebaker File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| // spell-checker:ignore Sapin | ||
| Copyright (c) Simon Sapin and many others | ||
|
|
||
| Permission is hereby granted, free of charge, to any | ||
| person obtaining a copy of this software and associated | ||
| documentation files (the "Software"), to deal in the | ||
| Software without restriction, including without | ||
| limitation the rights to use, copy, modify, merge, | ||
| publish, distribute, sublicense, and/or sell copies of | ||
| the Software, and to permit persons to whom the Software | ||
| is furnished to do so, subject to the following | ||
| conditions: | ||
|
|
||
| The above copyright notice and this permission notice | ||
| shall be included in all copies or substantial portions | ||
| of the Software. | ||
|
|
||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF | ||
| ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED | ||
| TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A | ||
| PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | ||
| SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||
| CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | ||
| OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR | ||
| IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
| DEALINGS IN THE SOFTWARE. | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,93 @@ | ||
| // spell-checker:ignore Sapin | ||
| mod read; | ||
cakebaker marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| pub use read::{BufReadDecoder, BufReadDecoderError}; | ||
|
|
||
| use std::cmp; | ||
| use std::str; | ||
|
|
||
| /// | ||
| /// Incremental, zero-copy UTF-8 decoding with error handling | ||
| /// | ||
| /// The original implementation was written by Simon Sapin in the utf-8 crate <https://crates.io/crates/utf-8>. | ||
| /// uu_wc used to depend on that crate. | ||
| /// The author archived the repository <https://github.com/SimonSapin/rust-utf8>. | ||
| /// They suggested incorporating the source directly into uu_wc <https://github.com/uutils/coreutils/issues/4289>. | ||
| /// | ||
|
|
||
| #[derive(Debug, Copy, Clone)] | ||
| pub struct Incomplete { | ||
| pub buffer: [u8; 4], | ||
| pub buffer_len: u8, | ||
| } | ||
|
|
||
| impl Incomplete { | ||
| pub fn empty() -> Self { | ||
| Self { | ||
| buffer: [0, 0, 0, 0], | ||
| buffer_len: 0, | ||
| } | ||
| } | ||
|
|
||
| pub fn is_empty(&self) -> bool { | ||
| self.buffer_len == 0 | ||
| } | ||
|
|
||
| pub fn new(bytes: &[u8]) -> Self { | ||
| let mut buffer = [0, 0, 0, 0]; | ||
| let len = bytes.len(); | ||
| buffer[..len].copy_from_slice(bytes); | ||
| Self { | ||
| buffer, | ||
| buffer_len: len as u8, | ||
| } | ||
| } | ||
|
|
||
| fn take_buffer(&mut self) -> &[u8] { | ||
| let len = self.buffer_len as usize; | ||
| self.buffer_len = 0; | ||
| &self.buffer[..len] | ||
| } | ||
|
|
||
| /// (consumed_from_input, None): not enough input | ||
| /// (consumed_from_input, Some(Err(()))): error bytes in buffer | ||
| /// (consumed_from_input, Some(Ok(()))): UTF-8 string in buffer | ||
| fn try_complete_offsets(&mut self, input: &[u8]) -> (usize, Option<Result<(), ()>>) { | ||
| let initial_buffer_len = self.buffer_len as usize; | ||
| let copied_from_input; | ||
| { | ||
| let unwritten = &mut self.buffer[initial_buffer_len..]; | ||
| copied_from_input = cmp::min(unwritten.len(), input.len()); | ||
| unwritten[..copied_from_input].copy_from_slice(&input[..copied_from_input]); | ||
| } | ||
| let spliced = &self.buffer[..initial_buffer_len + copied_from_input]; | ||
| match str::from_utf8(spliced) { | ||
| Ok(_) => { | ||
| self.buffer_len = spliced.len() as u8; | ||
| (copied_from_input, Some(Ok(()))) | ||
| } | ||
| Err(error) => { | ||
| let valid_up_to = error.valid_up_to(); | ||
| if valid_up_to > 0 { | ||
| let consumed = valid_up_to.checked_sub(initial_buffer_len).unwrap(); | ||
| self.buffer_len = valid_up_to as u8; | ||
| (consumed, Some(Ok(()))) | ||
| } else { | ||
| match error.error_len() { | ||
| Some(invalid_sequence_length) => { | ||
| let consumed = invalid_sequence_length | ||
| .checked_sub(initial_buffer_len) | ||
| .unwrap(); | ||
| self.buffer_len = invalid_sequence_length as u8; | ||
| (consumed, Some(Err(()))) | ||
| } | ||
| None => { | ||
| self.buffer_len = spliced.len() as u8; | ||
| (copied_from_input, None) | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,137 @@ | ||
| // spell-checker:ignore bytestream | ||
| use super::*; | ||
sylvestre marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| use std::error::Error; | ||
| use std::fmt; | ||
| use std::io::{self, BufRead}; | ||
| use std::str; | ||
|
|
||
| /// Wraps a `std::io::BufRead` buffered byte stream and decode it as UTF-8. | ||
| pub struct BufReadDecoder<B: BufRead> { | ||
| buf_read: B, | ||
| bytes_consumed: usize, | ||
| incomplete: Incomplete, | ||
| } | ||
|
|
||
| #[derive(Debug)] | ||
| pub enum BufReadDecoderError<'a> { | ||
| /// Represents one UTF-8 error in the byte stream. | ||
| /// | ||
| /// In lossy decoding, each such error should be replaced with U+FFFD. | ||
| /// (See `BufReadDecoder::next_lossy` and `BufReadDecoderError::lossy`.) | ||
| InvalidByteSequence(&'a [u8]), | ||
|
|
||
| /// An I/O error from the underlying byte stream | ||
| Io(io::Error), | ||
| } | ||
|
|
||
| impl<'a> fmt::Display for BufReadDecoderError<'a> { | ||
| fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
| match *self { | ||
| BufReadDecoderError::InvalidByteSequence(bytes) => { | ||
| write!(f, "invalid byte sequence: {:02x?}", bytes) | ||
| } | ||
| BufReadDecoderError::Io(ref err) => write!(f, "underlying bytestream error: {}", err), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl<'a> Error for BufReadDecoderError<'a> { | ||
| fn source(&self) -> Option<&(dyn Error + 'static)> { | ||
| match *self { | ||
| BufReadDecoderError::InvalidByteSequence(_) => None, | ||
| BufReadDecoderError::Io(ref err) => Some(err), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl<B: BufRead> BufReadDecoder<B> { | ||
| pub fn new(buf_read: B) -> Self { | ||
| Self { | ||
| buf_read, | ||
| bytes_consumed: 0, | ||
| incomplete: Incomplete::empty(), | ||
| } | ||
| } | ||
|
|
||
| /// Decode and consume the next chunk of UTF-8 input. | ||
| /// | ||
| /// This method is intended to be called repeatedly until it returns `None`, | ||
| /// which represents EOF from the underlying byte stream. | ||
| /// This is similar to `Iterator::next`, | ||
| /// except that decoded chunks borrow the decoder (~iterator) | ||
| /// so they need to be handled or copied before the next chunk can start decoding. | ||
| pub fn next_strict(&mut self) -> Option<Result<&str, BufReadDecoderError>> { | ||
| enum BytesSource { | ||
| BufRead(usize), | ||
| Incomplete, | ||
| } | ||
| macro_rules! try_io { | ||
| ($io_result: expr) => { | ||
publicmatt marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| match $io_result { | ||
| Ok(value) => value, | ||
| Err(error) => return Some(Err(BufReadDecoderError::Io(error))), | ||
| } | ||
| }; | ||
| } | ||
| let (source, result) = loop { | ||
| if self.bytes_consumed > 0 { | ||
| self.buf_read.consume(self.bytes_consumed); | ||
| self.bytes_consumed = 0; | ||
| } | ||
| let buf = try_io!(self.buf_read.fill_buf()); | ||
|
|
||
| // Force loop iteration to go through an explicit `continue` | ||
| enum Unreachable {} | ||
| let _: Unreachable = if self.incomplete.is_empty() { | ||
| if buf.is_empty() { | ||
| return None; // EOF | ||
| } | ||
| match str::from_utf8(buf) { | ||
| Ok(_) => break (BytesSource::BufRead(buf.len()), Ok(())), | ||
| Err(error) => { | ||
| let valid_up_to = error.valid_up_to(); | ||
| if valid_up_to > 0 { | ||
| break (BytesSource::BufRead(valid_up_to), Ok(())); | ||
| } | ||
| match error.error_len() { | ||
| Some(invalid_sequence_length) => { | ||
| break (BytesSource::BufRead(invalid_sequence_length), Err(())) | ||
| } | ||
| None => { | ||
| self.bytes_consumed = buf.len(); | ||
| self.incomplete = Incomplete::new(buf); | ||
| // need more input bytes | ||
| continue; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } else { | ||
| if buf.is_empty() { | ||
| break (BytesSource::Incomplete, Err(())); // EOF with incomplete code point | ||
| } | ||
| let (consumed, opt_result) = self.incomplete.try_complete_offsets(buf); | ||
| self.bytes_consumed = consumed; | ||
| match opt_result { | ||
| None => { | ||
| // need more input bytes | ||
| continue; | ||
| } | ||
| Some(result) => break (BytesSource::Incomplete, result), | ||
| } | ||
| }; | ||
| }; | ||
| let bytes = match source { | ||
| BytesSource::BufRead(byte_count) => { | ||
| self.bytes_consumed = byte_count; | ||
| let buf = try_io!(self.buf_read.fill_buf()); | ||
| &buf[..byte_count] | ||
| } | ||
| BytesSource::Incomplete => self.incomplete.take_buffer(), | ||
| }; | ||
| match result { | ||
| Ok(()) => Some(Ok(unsafe { str::from_utf8_unchecked(bytes) })), | ||
| Err(()) => Some(Err(BufReadDecoderError::InvalidByteSequence(bytes))), | ||
| } | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.