From 207f00be772a1ce43528c0e12dc332b210f53eaa Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sun, 12 Apr 2020 18:11:14 -0400 Subject: [PATCH 1/5] Add unit test for rejecting invalid UTF-8 --- src/uri/authority.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/uri/authority.rs b/src/uri/authority.rs index 44f93cc4..4752171e 100644 --- a/src/uri/authority.rs +++ b/src/uri/authority.rs @@ -616,4 +616,14 @@ mod tests { let err = Authority::parse_non_empty(b"[fe80::1:2:3:4]%20").unwrap_err(); assert_eq!(err.0, ErrorKind::InvalidAuthority); } + + #[test] + fn rejects_invalid_utf8() { + let err = Authority::try_from([0xc0u8].as_ref()).unwrap_err(); + assert_eq!(err.0, ErrorKind::InvalidUriChar); + + let err = Authority::from_shared(Bytes::from_static([0xc0u8].as_ref())) + .unwrap_err(); + assert_eq!(err.0, ErrorKind::InvalidUriChar); + } } From 9c51e0b0e85db19e91cf53b103da8c9d005b0c50 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sun, 12 Apr 2020 21:55:40 -0400 Subject: [PATCH 2/5] Add Authority::from_static() test --- src/uri/authority.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/uri/authority.rs b/src/uri/authority.rs index 4752171e..7ee81ae0 100644 --- a/src/uri/authority.rs +++ b/src/uri/authority.rs @@ -529,6 +529,12 @@ mod tests { assert_eq!("EXAMPLE.com", authority); } + #[test] + fn from_static_equates_with_a_str() { + let authority = Authority::from_static("example.com"); + assert_eq!(authority, "example.com"); + } + #[test] fn not_equal_with_a_str_of_a_different_authority() { let authority: Authority = "example.com".parse().unwrap(); From d181a44f7bc2f7136facc2dd35bcdcd28678d79f Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Tue, 14 Apr 2020 10:41:12 -0400 Subject: [PATCH 3/5] Refactor uri::Authority Extract the common code from three ways of creating an Authority into a private create_authority() function. --- src/uri/authority.rs | 55 ++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/src/uri/authority.rs b/src/uri/authority.rs index 7ee81ae0..d0ecd747 100644 --- a/src/uri/authority.rs +++ b/src/uri/authority.rs @@ -23,15 +23,7 @@ impl Authority { // Not public while `bytes` is unstable. pub(super) fn from_shared(s: Bytes) -> Result { - let authority_end = Authority::parse_non_empty(&s[..])?; - - if authority_end != s.len() { - return Err(ErrorKind::InvalidUriChar.into()); - } - - Ok(Authority { - data: unsafe { ByteStr::from_utf8_unchecked(s) }, - }) + create_authority(s, |s| s) } /// Attempt to convert an `Authority` from a static string. @@ -52,18 +44,8 @@ impl Authority { /// assert_eq!(authority.host(), "example.com"); /// ``` pub fn from_static(src: &'static str) -> Self { - let s = src.as_bytes(); - let b = Bytes::from_static(s); - let authority_end = - Authority::parse_non_empty(&b[..]).expect("static str is not valid authority"); - - if authority_end != b.len() { - panic!("static str is not valid authority"); - } - - Authority { - data: unsafe { ByteStr::from_utf8_unchecked(b) }, - } + Authority::from_shared(Bytes::from_static(src.as_bytes())) + .expect("static str is not valid authority") } @@ -432,17 +414,7 @@ impl<'a> TryFrom<&'a [u8]> for Authority { #[inline] fn try_from(s: &'a [u8]) -> Result { // parse first, and only turn into Bytes if valid - let end = Authority::parse_non_empty(s)?; - - if end != s.len() { - return Err(ErrorKind::InvalidAuthority.into()); - } - - Ok(Authority { - data: unsafe { - ByteStr::from_utf8_unchecked(Bytes::copy_from_slice(s)) - }, - }) + create_authority(s, |s| Bytes::copy_from_slice(s)) } } @@ -494,6 +466,25 @@ fn host(auth: &str) -> &str { } } +fn create_authority(b: B, f: F) -> Result +where + B: AsRef<[u8]>, + F: FnOnce(B) -> Bytes, +{ + let s = b.as_ref(); + let authority_end = Authority::parse_non_empty(s)?; + + if authority_end != s.len() { + return Err(ErrorKind::InvalidUriChar.into()); + } + + let bytes = f(b); + + Ok(Authority { + data: unsafe { ByteStr::from_utf8_unchecked(bytes) }, + }) +} + #[cfg(test)] mod tests { use super::*; From 8e5ef6b9abbcabddcbb023ec9a489a5faa9976d6 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Tue, 14 Apr 2020 17:16:58 -0400 Subject: [PATCH 4/5] Add comments to explain the safety of Authority The comments describe the preconditions and postconditions that together ensure that the one use of 'unsafe' in uri/authority.rs is sound. --- src/uri/authority.rs | 19 +++++++++++++++++++ src/uri/mod.rs | 6 ++++++ 2 files changed, 25 insertions(+) diff --git a/src/uri/authority.rs b/src/uri/authority.rs index d0ecd747..baf8552d 100644 --- a/src/uri/authority.rs +++ b/src/uri/authority.rs @@ -23,6 +23,8 @@ impl Authority { // Not public while `bytes` is unstable. pub(super) fn from_shared(s: Bytes) -> Result { + // Preconditon on create_authority: trivially satisfied by the + // identity clousre create_authority(s, |s| s) } @@ -65,6 +67,8 @@ impl Authority { } // Note: this may return an *empty* Authority. You might want `parse_non_empty`. + // Postcondition: for all Ok() returns, s[..ret.unwrap()] is valid UTF-8 where + // ret is the return value. pub(super) fn parse(s: &[u8]) -> Result { let mut colon_cnt = 0; let mut start_bracket = false; @@ -73,6 +77,10 @@ impl Authority { let mut end = s.len(); let mut at_sign_pos = None; + // Among other things, this loop checks that every byte in s up to the + // first '/', '?', or '#' is a valid URI character (or in some contexts, + // a '%'). This means that each such byte is a valid single-byte UTF-8 + // code point. for (i, &b) in s.iter().enumerate() { match URI_CHARS[b as usize] { b'/' | b'?' | b'#' => { @@ -150,6 +158,9 @@ impl Authority { // // This should be used by functions that allow a user to parse // an `Authority` by itself. + // + // Postcondition: for all Ok() returns, s[..ret.unwrap()] is valid UTF-8 where + // ret is the return value. fn parse_non_empty(s: &[u8]) -> Result { if s.is_empty() { return Err(ErrorKind::Empty.into()); @@ -414,6 +425,9 @@ impl<'a> TryFrom<&'a [u8]> for Authority { #[inline] fn try_from(s: &'a [u8]) -> Result { // parse first, and only turn into Bytes if valid + + // Preconditon on create_authority: copy_from_slice() copies all of + // bytes from the [u8] parameter into a new Bytes create_authority(s, |s| Bytes::copy_from_slice(s)) } } @@ -466,6 +480,8 @@ fn host(auth: &str) -> &str { } } +// Precondition: f converts all of the bytes in the passed in B into the +// returned Bytes. fn create_authority(b: B, f: F) -> Result where B: AsRef<[u8]>, @@ -481,6 +497,9 @@ where let bytes = f(b); Ok(Authority { + // Safety: the postcondition on parse_non_empty() and the check against + // s.len() ensure that b is valid UTF-8. The precondition on f ensures + // that this is carried through to bytes. data: unsafe { ByteStr::from_utf8_unchecked(bytes) }, }) } diff --git a/src/uri/mod.rs b/src/uri/mod.rs index 716ea1ad..144c6208 100644 --- a/src/uri/mod.rs +++ b/src/uri/mod.rs @@ -143,6 +143,12 @@ enum ErrorKind { // u16::MAX is reserved for None const MAX_LEN: usize = (u16::MAX - 1) as usize; +// URI_CHARS is a table of valid characters in a URI. An entry in the table is +// 0 for invalid characters. For valid characters the entry is itself (i.e. +// the entry for 33 is b'!' because b'!' == 33u8). An important characteristic +// of this table is that all entries above 127 are invalid. This makes all of the +// valid entries a valid single-byte UTF-8 code point. This means that a slice +// of such valid entries is valid UTF-8. const URI_CHARS: [u8; 256] = [ // 0 1 2 3 4 5 6 7 8 9 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x From 2a63fb26fd30c32776bbf5d209b3dfb4e2994cae Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Wed, 15 Apr 2020 19:30:24 -0400 Subject: [PATCH 5/5] Fix typo --- src/uri/authority.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uri/authority.rs b/src/uri/authority.rs index baf8552d..f748d3ed 100644 --- a/src/uri/authority.rs +++ b/src/uri/authority.rs @@ -23,7 +23,7 @@ impl Authority { // Not public while `bytes` is unstable. pub(super) fn from_shared(s: Bytes) -> Result { - // Preconditon on create_authority: trivially satisfied by the + // Precondition on create_authority: trivially satisfied by the // identity clousre create_authority(s, |s| s) }