Skip to content
This repository was archived by the owner on Jun 16, 2020. It is now read-only.
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 7 additions & 49 deletions src/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
use std::cmp::min;
use std::collections::HashSet;
use std::result;
use std::str;
use std::vec::Vec;

use limits::{
Expand Down Expand Up @@ -1323,55 +1324,12 @@ impl<'a> ValidatingParser<'a> {
}

fn check_utf8(&self, bytes: &[u8]) -> ValidatorResult<'a, ()> {
// https://encoding.spec.whatwg.org/#utf-8
let mut bytes_needed = 0;
let mut lower_boundary = 0x80;
let mut upper_boundary = 0xBF;
for byte_ref in bytes {
let byte = *byte_ref;
if bytes_needed == 0 {
if byte <= 0x7F {
continue;
}
if 0xC2 <= byte && byte <= 0xDF {
bytes_needed = 1;
continue;
}
if 0xE0 <= byte && byte <= 0xEF {
bytes_needed = 2;
if byte == 0xE0 {
lower_boundary = 0xA0;
}
if byte == 0xED {
upper_boundary = 0x9F;
}
continue;
}
if 0xF0 <= byte && byte <= 0xF4 {
bytes_needed = 3;
if byte == 0xF0 {
lower_boundary = 0x90;
}
if byte == 0xF4 {
upper_boundary = 0x8F;
}
continue;
}
return self.create_error("Invalid utf-8: unexpected code");
}

if byte < lower_boundary || byte > upper_boundary {
return self.create_error("Invalid utf-8: code point out of range");
}

lower_boundary = 0x80;
upper_boundary = 0xBF;
bytes_needed -= 1;
}
if bytes_needed > 0 {
self.create_error("Invalid utf-8: unexpected end of string")
} else {
Ok(())
match str::from_utf8(bytes) {
Ok(_) => Ok(()),
Err(utf8_error) => match utf8_error.error_len() {
None => self.create_error("Invalid utf-8: unexpected end of string"),
Some(_) => self.create_error("Invalid utf-8: unexpected byte"),
},
}
}

Expand Down