RustCrypto · newpavlov · Sep 5, 2025 · Aug 27, 2025 · Aug 27, 2025 · Aug 27, 2025
diff --git a/blobby/CHANGELOG.md b/blobby/CHANGELOG.md
@@ -7,8 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## 0.4.0 (unreleased)
 ### Changed
 - Edition changed to 2024 and MSRV bumped to 1.85 ([#1149])
+- Replaced iterators with `const fn` parsing ([#1187])
+- Format of the file. File header now contains total number of stored blobs. ([#1207])
 
 [#1149]: https://github.com/RustCrypto/utils/pull/1149
+[#1187]: https://github.com/RustCrypto/utils/pull/1187
+[#1207]: https://github.com/RustCrypto/utils/pull/1207
 
 ## 0.3.1 (2021-12-07)
 ### Added

diff --git a/blobby/README.md b/blobby/README.md
@@ -7,13 +7,14 @@
 ![Rust Version][rustc-image]
 [![Project Chat][chat-image]][chat-link]
 
-Iterators over a simple binary blob storage.
+An encoding and decoding library for the Blobby (`blb`) file format, which serves as a simple,
+deduplicated storage format for a sequence of binary blobs.
 
 ## Examples
 ```
 // We recommend to save blobby data into separate files and
 // use the `include_bytes!` macro
-static BLOBBY_DATA: &[u8] = b"\x02\x05hello\x06world!\x01\x02 \x00\x03\x06:::\x03\x01\x00";
+static BLOBBY_DATA: &[u8; 27] = b"\x08\x02\x05hello\x06world!\x01\x02 \x00\x03\x06:::\x03\x01\x00";
 
 static SLICE: &[&[u8]] = blobby::parse_into_slice!(BLOBBY_DATA);
 
@@ -54,7 +55,7 @@ assert_eq!(
 assert_eq!(ITEMS.len(), 2);
 ```
 
-## Encoding and decoding
+## Encoding and decoding utilities
 
 This crate provides encoding and decoding utilities for converting between
 the blobby format and text file with hex-encoded strings. 
@@ -97,9 +98,7 @@ This file can be converted to the Blobby format by running the following command
 cargo run --release --features alloc --bin encode -- /path/to/input.txt /path/to/output.blb
 ```
 
-This will create a file which can be read using `blobby::Blob2Iterator`.
-
-To see contents of an existing Blobby file you can use the following command:
+To inspect contents of an existing Blobby file you can use the following command:
 ```sh
 cargo run --release --features alloc --bin decode -- /path/to/input.blb /path/to/output.txt
 ```
@@ -109,20 +108,22 @@ in the input file.
 ## Storage format
 
 Storage format represents a sequence of binary blobs. The format uses
-git-flavored [variable-length quantity][0] (VLQ) for encoding unsigned
+git-flavored [variable-length quantity][VLQ] (VLQ) for encoding unsigned
 numbers.
 
-File starts with a number of de-duplicated blobs `d`. It followed by `d`
-entries. Each entry starts with an integer `m`, immediately followed by `m`
+Blobby files start with two numbers: total number of blobs in the file `n` and
+number of de-duplicated blobs `d`. The numbers are followed by `d` entries.
+Each entry starts with an integer `m`, immediately followed by `m`
 bytes representing de-duplicated binary blob.
 
-Next follows unspecified number of entries representing sequence of stored
-blobs. Each entry starts with an unsigned integer `n`. The least significant
+Next, follows `n` entries representing sequence of stored blobs.
+Each entry starts with an unsigned integer `l`. The least significant
 bit of this integer is used as a flag. If the flag is equal to 0, then the
 number is followed by `n >> 1` bytes, representing a stored binary blob.
-Otherwise the entry references a de-duplicated entry number `n >> 1`.
+Otherwise the entry references a de-duplicated entry number `n >> 1`
+which should be smaller than `d`.
 
-[0]: https://en.wikipedia.org/wiki/Variable-length_quantity
+[VLQ]: https://en.wikipedia.org/wiki/Variable-length_quantity
 
 ## License
 

diff --git a/blobby/src/decode.rs b/blobby/src/decode.rs
@@ -50,51 +50,38 @@ macro_rules! try_read_vlq {
     };
 }
 
-pub const fn parse_dedup_len(mut data: &[u8]) -> Result<usize, Error> {
-    read_vlq(&mut data)
+/// Blobby file header
+pub struct Header {
+    /// Number of blobs stored in the file
+    pub items_len: usize,
+    /// Number of deduplicated blobs
+    pub dedup_len: usize,
 }
 
-pub const fn parse_items_len(mut data: &[u8]) -> Result<usize, Error> {
-    let dedup_index_len = try_read_vlq!(data);
-
-    let mut i = 0;
-    while i < dedup_index_len {
-        let m = try_read_vlq!(data);
-        let split = data.split_at(m);
-        data = split.1;
-        i += 1;
-    }
-
-    let mut i = 0;
-    loop {
-        if data.is_empty() {
-            return Ok(i);
+impl Header {
+    /// Parse blobby header
+    pub const fn parse(data: &mut &[u8]) -> Result<Self, Error> {
+        match (read_vlq(data), read_vlq(data)) {
+            (Ok(items_len), Ok(dedup_len)) => Ok(Header {
+                items_len,
+                dedup_len,
+            }),
+            (Err(err), _) | (Ok(_), Err(err)) => Err(err),
         }
-        let val = try_read_vlq!(data);
-        // the least significant bit is used as a flag
-        let is_ref = (val & 1) != 0;
-        let val = val >> 1;
-        if is_ref {
-            if val >= dedup_index_len {
-                return Err(Error::InvalidIndex);
-            }
-        } else {
-            if val > data.len() {
-                return Err(Error::UnexpectedEnd);
-            }
-            let split = data.split_at(val);
-            data = split.1;
-        };
-        i += 1;
     }
 }
 
 /// Parse blobby data into an array.
-pub const fn parse_into_array<const ITEMS: usize, const DEDUP_LEN: usize>(
+pub const fn parse_into_array<const ITEMS_LEN: usize, const DEDUP_LEN: usize>(
     mut data: &[u8],
-) -> Result<[&[u8]; ITEMS], Error> {
-    if try_read_vlq!(data) != DEDUP_LEN {
-        return Err(Error::BadArrayLen);
+) -> Result<[&[u8]; ITEMS_LEN], Error> {
+    match Header::parse(&mut data) {
+        Ok(header) => {
+            if header.items_len != ITEMS_LEN || header.dedup_len != DEDUP_LEN {
+                return Err(Error::BadArrayLen);
+            }
+        }
+        Err(err) => return Err(err),
     }
 
     let mut dedup_index: [&[u8]; DEDUP_LEN] = [&[]; DEDUP_LEN];
@@ -108,7 +95,7 @@ pub const fn parse_into_array<const ITEMS: usize, const DEDUP_LEN: usize>(
         i += 1;
     }
 
-    let mut res: [&[u8]; ITEMS] = [&[]; ITEMS];
+    let mut res: [&[u8]; ITEMS_LEN] = [&[]; ITEMS_LEN];
 
     let mut i = 0;
     while i < res.len() {
@@ -144,7 +131,10 @@ pub const fn parse_into_array<const ITEMS: usize, const DEDUP_LEN: usize>(
 pub fn parse_into_vec(mut data: &[u8]) -> Result<alloc::vec::Vec<&[u8]>, Error> {
     use alloc::{vec, vec::Vec};
 
-    let dedup_len = try_read_vlq!(data);
+    let Header {
+        items_len,
+        dedup_len,
+    } = Header::parse(&mut data)?;
 
     let mut dedup_index: Vec<&[u8]> = vec![&[]; dedup_len];
 
@@ -157,7 +147,6 @@ pub fn parse_into_vec(mut data: &[u8]) -> Result<alloc::vec::Vec<&[u8]>, Error>
         i += 1;
     }
 
-    let items_len = parse_items_len(data)?;
     let mut res: Vec<&[u8]> = vec![&[]; items_len];
 
     let mut i = 0;
@@ -189,20 +178,15 @@ pub fn parse_into_vec(mut data: &[u8]) -> Result<alloc::vec::Vec<&[u8]>, Error>
 #[macro_export]
 macro_rules! parse_into_slice {
     ($data:expr) => {{
-        const ITEMS_LEN: usize = {
-            match $crate::parse_items_len($data) {
+        const HEADER: $crate::Header = {
+            let mut data: &[u8] = $data;
+            match $crate::Header::parse(&mut data) {
                 Ok(v) => v,
                 Err(_) => panic!("Failed to parse items len"),
             }
         };
-        const DEDUP_LEN: usize = {
-            match $crate::parse_dedup_len($data) {
-                Ok(v) => v,
-                Err(_) => panic!("Failed to parse dedup len"),
-            }
-        };
-        const ITEMS: [&[u8]; ITEMS_LEN] = {
-            match $crate::parse_into_array::<ITEMS_LEN, DEDUP_LEN>($data) {
+        const ITEMS: [&[u8]; { HEADER.items_len }] = {
+            match $crate::parse_into_array::<{ HEADER.items_len }, { HEADER.dedup_len }>($data) {
                 Ok(v) => v,
                 Err(_) => panic!("Failed to parse items"),
             }

diff --git a/blobby/src/encode.rs b/blobby/src/encode.rs
@@ -30,59 +30,66 @@ fn encode_vlq(mut val: usize, buf: &mut [u8; 4]) -> &[u8] {
 /// Returns the encoded data together with a count of the number of blobs included in the index.
 ///
 /// The encoded file format is:
-///  - count of index entries=N
-///  - N x index entries, each encoded as:
+///  - number of blobs in the file = N
+///  - number of deduplicated index entries = M
+///  - M x index entries encoded as:
 ///      - size L of index entry (VLQ)
 ///      - index blob contents (L bytes)
-///  - repeating encoded blobs, each encoded as:
+///  - N x blobs encoded as:
 ///      - VLQ value that is either:
 ///         - (J << 1) & 0x01: indicates this blob is index entry J
 ///         - (L << 1) & 0x00: indicates an explicit blob of len L
 ///      - (in the latter case) explicit blob contents (L bytes)
-pub fn encode_blobs<'a, I, T>(blobs: &'a I) -> (alloc::vec::Vec<u8>, usize)
+pub fn encode_blobs<T>(blobs: &[T]) -> (alloc::vec::Vec<u8>, usize)
 where
-    &'a I: IntoIterator<Item = &'a T>,
-    T: AsRef<[u8]> + 'a,
+    T: AsRef<[u8]>,
 {
     use alloc::{collections::BTreeMap, vec::Vec};
 
-    let mut idx_map = BTreeMap::new();
+    let mut dedup_map = BTreeMap::new();
     blobs
-        .into_iter()
+        .iter()
         .map(|v| v.as_ref())
         .filter(|blob| !blob.is_empty())
         .for_each(|blob| {
-            let v = idx_map.entry(blob.as_ref()).or_insert(0);
+            let v = dedup_map.entry(blob.as_ref()).or_insert(0);
             *v += 1;
         });
 
-    let mut idx: Vec<&[u8]> = idx_map
+    let mut dedup_list: Vec<&[u8]> = dedup_map
         .iter()
         .filter(|&(_, &v)| v > 1)
         .map(|(&k, _)| k)
         .collect();
-    idx.sort_by_key(|e| {
+    dedup_list.sort_by_key(|e| {
         let k = match e {
             [0] => 2,
             [1] => 1,
             _ => 0,
         };
-        (k, idx_map.get(e).unwrap())
+        (k, dedup_map.get(e).unwrap())
     });
-    idx.reverse();
-    let idx_len = idx.len();
+    dedup_list.reverse();
+    let idx_len = dedup_list.len();
 
-    let rev_idx: BTreeMap<&[u8], usize> = idx.iter().enumerate().map(|(i, &e)| (e, i)).collect();
+    let rev_idx: BTreeMap<&[u8], usize> = dedup_list
+        .iter()
+        .enumerate()
+        .map(|(i, &e)| (e, i))
+        .collect();
 
     let mut out_buf = Vec::new();
     let mut buf = [0u8; 4];
-    out_buf.extend_from_slice(encode_vlq(idx.len(), &mut buf));
-    for e in idx {
+
+    out_buf.extend_from_slice(encode_vlq(blobs.len(), &mut buf));
+    out_buf.extend_from_slice(encode_vlq(dedup_list.len(), &mut buf));
+
+    for e in dedup_list {
         out_buf.extend_from_slice(encode_vlq(e.len(), &mut buf));
         out_buf.extend_from_slice(e);
     }
 
-    for blob in blobs.into_iter().map(|v| v.as_ref()) {
+    for blob in blobs.iter().map(|v| v.as_ref()) {
         if let Some(dup_pos) = rev_idx.get(blob) {
             let n = (dup_pos << 1) + 1usize;
             out_buf.extend_from_slice(encode_vlq(n, &mut buf));

diff --git a/blobby/src/lib.rs b/blobby/src/lib.rs
@@ -13,7 +13,7 @@ extern crate alloc;
 pub(crate) mod decode;
 #[cfg(feature = "alloc")]
 pub use decode::parse_into_vec;
-pub use decode::{parse_dedup_len, parse_into_array, parse_items_len};
+pub use decode::{Header, parse_into_array};
 
 #[cfg(feature = "alloc")]
 mod encode;

diff --git a/blobby/tests/mod.rs b/blobby/tests/mod.rs
@@ -0,0 +1,31 @@
+#![cfg(feature = "alloc")]
+
+const ITEMS_LEN: usize = 10;
+const DEDUP_LEN: usize = 3;
+const TEST_BLOBS: &[&[u8]; ITEMS_LEN] = &[
+    b"1",
+    b"12",
+    b"1",
+    b"1",
+    b"123",
+    &[42; 100_000],
+    &[42; 100_000],
+    &[13; 7_000],
+    &[13; 7_000],
+    &[13; 5_000],
+];
+
+#[test]
+fn blobby_rondtrip_test() -> Result<(), blobby::Error> {
+    let (blobby_data, dedup_len) = blobby::encode_blobs(TEST_BLOBS);
+    assert_eq!(dedup_len, DEDUP_LEN);
+    assert_eq!(blobby_data.len(), 112_025);
+
+    let decoded_blobs = blobby::parse_into_array::<ITEMS_LEN, DEDUP_LEN>(&blobby_data)?;
+    assert_eq!(decoded_blobs, TEST_BLOBS[..]);
+
+    let decoded_blobs = blobby::parse_into_vec(&blobby_data)?;
+    assert_eq!(decoded_blobs, TEST_BLOBS[..]);
+
+    Ok(())
+}