-
Notifications
You must be signed in to change notification settings - Fork 543
Variable Length Quantity #147
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| [package] | ||
| name = "variable-length-quantity" | ||
| version = "0.0.0" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,96 @@ | ||
| /// Convert a list of numbers to a stream of bytes encoded with variable length encoding. | ||
| pub fn to_bytes(values: &[u32]) -> Vec<u8> { | ||
| let mut res = vec![]; | ||
|
|
||
| for value in values { | ||
| res.append(&mut to_bytes_single(*value)); | ||
| } | ||
| res | ||
| } | ||
|
|
||
| fn to_bytes_single(mut value: u32) -> Vec<u8> { | ||
| // over allocates, but avoids growth | ||
| let mut res = Vec::with_capacity(4); | ||
|
|
||
| // 0 must be handeled specially, because we need to push one byte | ||
| if value == 0 { | ||
| return vec![0]; | ||
| } | ||
|
|
||
| while value > 0 { | ||
| // take the lower 7 bits | ||
| let mut tmp = (value & 0x7f) as u8; | ||
| // remove them from the original value | ||
| value >>= 7; | ||
|
|
||
| // set continuation bit | ||
| if !res.is_empty() { | ||
| tmp |= 0x80; | ||
| } | ||
|
|
||
| res.push(tmp); | ||
| } | ||
|
|
||
| // order is wrong due to the way we pushed the data onto it | ||
| res.reverse(); | ||
| res | ||
| } | ||
|
|
||
| // Alternative solution with hardcoded borders | ||
| // /// Convert a list of numbers to a stream of bytes encoded with variable length encoding. | ||
| // pub fn to_bytes(values: &[u32]) -> Vec<u8> { | ||
| // let mut res = vec![]; | ||
| // | ||
| // for &value in values { | ||
| // if value <= 0x7f { | ||
| // res.push(value as u8); | ||
| // } else if value <= 0x3fff { | ||
| // res.push(((value >> 7) & 0xff) as u8 | 0x80); | ||
| // res.push((value & 0x7f) as u8); | ||
| // } else if value <= 0x1f_ffff { | ||
| // res.push(((value >> 14) & 0xff) as u8 | 0x80); | ||
| // res.push(((value >> 7) & 0xff) as u8 | 0x80); | ||
| // res.push((value & 0x7f) as u8); | ||
| // } else if value <= 0x0fff_ffff { | ||
| // res.push(((value >> 21) & 0xff) as u8 | 0x80); | ||
| // res.push(((value >> 14) & 0xff) as u8 | 0x80); | ||
| // res.push(((value >> 7) & 0xff) as u8 | 0x80); | ||
| // res.push((value & 0x7f) as u8); | ||
| // } else { | ||
| // res.push(((value >> 28) & 0xff) as u8 | 0x80); | ||
| // res.push(((value >> 21) & 0xff) as u8 | 0x80); | ||
| // res.push(((value >> 14) & 0xff) as u8 | 0x80); | ||
| // res.push(((value >> 7) & 0xff) as u8 | 0x80); | ||
| // res.push((value & 0x7f) as u8); | ||
| // } | ||
| // } | ||
| // res | ||
| // } | ||
|
|
||
| /// Given a stream of bytes, extract all numbers which are encoded in there. | ||
| pub fn from_bytes(bytes: &[u8]) -> Result<Vec<u32>, &'static str> { | ||
| let mut res = vec![]; | ||
| let mut tmp = 0; | ||
| for b in bytes { | ||
| // test if first 7 bit are set, to check for overflow | ||
| if (tmp & 0xfe_00_00_00) > 0 { | ||
| return Err("Would overflow"); | ||
| } | ||
|
|
||
| // append bytes of b to tmp | ||
| tmp = (tmp << 7) | (b & 0x7f) as u32; | ||
|
|
||
| if 0x80 & b == 0 { | ||
| // continuation bit not set, number if complete | ||
| res.push(tmp); | ||
| tmp = 0; | ||
| } | ||
| } | ||
|
|
||
| // check for incomplete bytes | ||
| if tmp != 0 { | ||
| return Err("Incomplete byte sequence"); | ||
| } | ||
|
|
||
| Ok(res) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| /// Convert a list of numbers to a stream of bytes encoded with variable length encoding. | ||
| pub fn to_bytes(values: &[u32]) -> Vec<u8> { | ||
| unimplemented!() | ||
| } | ||
|
|
||
| /// Given a stream of bytes, extract all numbers which are encoded in there. | ||
| pub fn from_bytes(bytes: &[u8]) -> Result<Vec<u32>, &'static str> { | ||
| unimplemented!() | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,102 @@ | ||
| extern crate variable_length_quantity as vlq; | ||
|
|
||
| #[test] | ||
| fn to_single_byte() { | ||
| assert_eq!(&[0x00], vlq::to_bytes(&[0x00]).as_slice()); | ||
| assert_eq!(&[0x40], vlq::to_bytes(&[0x40]).as_slice()); | ||
| assert_eq!(&[0x7f], vlq::to_bytes(&[0x7f]).as_slice()); | ||
| } | ||
|
|
||
| #[test] | ||
| #[ignore] | ||
| fn to_double_byte() { | ||
| assert_eq!(&[0x81, 0x00], vlq::to_bytes(&[0x80]).as_slice()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to my earlier comment, are there edge cases that these additional tests are capturing? I've found that tests that focus on one thing work best. So if each of these asserts is testing a unique implementation detail, then I'd make this 4 tests. But if getting the first assert to pass will also make the remaining 3 asserts pass, then I'd get rid of the additional asserts. |
||
| assert_eq!(&[0xc0, 0x00], vlq::to_bytes(&[0x2000]).as_slice()); | ||
| assert_eq!(&[0xff, 0x7f], vlq::to_bytes(&[0x3fff]).as_slice()); | ||
| } | ||
|
|
||
| #[test] | ||
| #[ignore] | ||
| fn to_triple_byte() { | ||
| assert_eq!(&[0x81, 0x80, 0x00], vlq::to_bytes(&[0x4000]).as_slice()); | ||
| assert_eq!(&[0xc0, 0x80, 0x00], vlq::to_bytes(&[0x10_0000]).as_slice()); | ||
| assert_eq!(&[0xff, 0xff, 0x7f], vlq::to_bytes(&[0x1f_ffff]).as_slice()); | ||
| } | ||
|
|
||
| #[test] | ||
| #[ignore] | ||
| fn to_quadruple_byte() { | ||
| assert_eq!(&[0x81, 0x80, 0x80, 0x00], | ||
| vlq::to_bytes(&[0x20_0000]).as_slice()); | ||
| assert_eq!(&[0xc0, 0x80, 0x80, 0x00], | ||
| vlq::to_bytes(&[0x0800_0000]).as_slice()); | ||
| assert_eq!(&[0xff, 0xff, 0xff, 0x7f], | ||
| vlq::to_bytes(&[0x0fff_ffff]).as_slice()); | ||
| } | ||
|
|
||
| #[test] | ||
| #[ignore] | ||
| fn to_quintuple_byte() { | ||
| assert_eq!(&[0x81, 0x80, 0x80, 0x80, 0x00], | ||
| vlq::to_bytes(&[0x1000_0000]).as_slice()); | ||
| assert_eq!(&[0x8f, 0xf8, 0x80, 0x80, 0x00], | ||
| vlq::to_bytes(&[0xff00_0000]).as_slice()); | ||
| assert_eq!(&[0x8f, 0xff, 0xff, 0xff, 0x7f], | ||
| vlq::to_bytes(&[0xffff_ffff]).as_slice()); | ||
| } | ||
|
|
||
| #[test] | ||
| #[ignore] | ||
| fn from_bytes() { | ||
| assert_eq!(&[0x7f], vlq::from_bytes(&[0x7f]).unwrap().as_slice()); | ||
| assert_eq!(&[0x2000], | ||
| vlq::from_bytes(&[0xc0, 0x00]).unwrap().as_slice()); | ||
| assert_eq!(&[0x1f_ffff], | ||
| vlq::from_bytes(&[0xff, 0xff, 0x7f]).unwrap().as_slice()); | ||
| assert_eq!(&[0x20_0000], | ||
| vlq::from_bytes(&[0x81, 0x80, 0x80, 0x00]).unwrap().as_slice()); | ||
| assert_eq!(&[0xffff_ffff], | ||
| vlq::from_bytes(&[0x8f, 0xff, 0xff, 0xff, 0x7f]).unwrap().as_slice()); | ||
| } | ||
|
|
||
|
|
||
| #[test] | ||
| #[ignore] | ||
| fn to_bytes_multiple_values() { | ||
| assert_eq!(&[0x40, 0x7f], vlq::to_bytes(&[0x40, 0x7f]).as_slice()); | ||
| assert_eq!(&[0x81, 0x80, 0x00, 0xc8, 0xe8, 0x56], | ||
| vlq::to_bytes(&[0x4000, 0x12_3456]).as_slice()); | ||
| assert_eq!(&[0xc0, 0x00, 0xc8, 0xe8, 0x56, 0xff, 0xff, 0xff, 0x7f, 0x00, 0xff, 0x7f, 0x81, | ||
| 0x80, 0x00], | ||
| vlq::to_bytes(&[0x2000, 0x12_3456, 0x0fff_ffff, 0x00, 0x3fff, 0x4000]).as_slice()); | ||
| } | ||
|
|
||
| #[test] | ||
| #[ignore] | ||
| fn from_bytes_multiple_values() { | ||
| assert_eq!(&[0x2000, 0x12_3456, 0x0fff_ffff, 0x00, 0x3fff, 0x4000], | ||
| vlq::from_bytes(&[0xc0, 0x00, 0xc8, 0xe8, 0x56, 0xff, 0xff, 0xff, 0x7f, 0x00, | ||
| 0xff, 0x7f, 0x81, 0x80, 0x00]) | ||
| .unwrap() | ||
| .as_slice()); | ||
| } | ||
|
|
||
| #[test] | ||
| #[ignore] | ||
| fn incomplete_byte_sequence() { | ||
| assert!(vlq::from_bytes(&[0xff]).is_err()); | ||
| } | ||
|
|
||
| #[test] | ||
| #[ignore] | ||
| fn overflow_u32() { | ||
| assert!(vlq::from_bytes(&[0xff, 0xff, 0xff, 0xff, 0x7f]).is_err()); | ||
| } | ||
|
|
||
| #[test] | ||
| #[ignore] | ||
| fn chained_execution_is_identity() { | ||
| let test = &[0xf2, 0xf6, 0x96, 0x9c, 0x3b, 0x39, 0x2e, 0x30, 0xb3, 0x24]; | ||
| assert_eq!(test, | ||
| vlq::from_bytes(&vlq::to_bytes(test)).unwrap().as_slice()); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a value to having both of these tests? Is there an implementation detail that's tested in the second assert that is not tested in the first assert?