From 14ea2dc7428bfa14298f53b677952e3a75bc79fa Mon Sep 17 00:00:00 2001 From: Matthew Kim <38759997+friendlymatthew@users.noreply.github.com> Date: Fri, 27 Jun 2025 13:24:59 -0400 Subject: [PATCH 1/7] Make ObjectBuilder faster --- parquet-variant/src/builder.rs | 141 +++++++++++---------------------- 1 file changed, 46 insertions(+), 95 deletions(-) diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index fda15c2b4336..6d9cd143d657 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -16,7 +16,7 @@ // under the License. use crate::decoder::{VariantBasicType, VariantPrimitiveType}; use crate::{ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8}; -use std::collections::BTreeMap; +use std::collections::HashMap; const BASIC_TYPE_BITS: u8 = 2; const UNIX_EPOCH_DATE: chrono::NaiveDate = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); @@ -233,14 +233,14 @@ impl ValueBuffer { #[derive(Default)] struct MetadataBuilder { - field_name_to_id: BTreeMap, + field_name_to_id: HashMap, field_names: Vec, } impl MetadataBuilder { /// Upsert field name to dictionary, return its ID fn upsert_field_name(&mut self, field_name: &str) -> u32 { - use std::collections::btree_map::Entry; + use std::collections::hash_map::Entry; match self.field_name_to_id.entry(field_name.to_string()) { Entry::Occupied(entry) => *entry.get(), Entry::Vacant(entry) => { @@ -256,6 +256,10 @@ impl MetadataBuilder { self.field_names.len() } + fn field_name(&self, i: usize) -> &str { + &self.field_names[i] + } + fn metadata_size(&self) -> usize { self.field_names.iter().map(|k| k.len()).sum() } @@ -567,7 +571,7 @@ impl<'a> ListBuilder<'a> { pub struct ObjectBuilder<'a, 'b> { parent_buffer: &'a mut ValueBuffer, metadata_builder: &'a mut MetadataBuilder, - fields: BTreeMap, // (field_id, offset) + fields: Vec<(u32, usize)>, // (field_id, offset) buffer: ValueBuffer, /// Is there a pending list or object that needs to be finalized? pending: Option<(&'b str, usize)>, @@ -578,19 +582,26 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { Self { parent_buffer, metadata_builder, - fields: BTreeMap::new(), + fields: Vec::new(), buffer: ValueBuffer::default(), pending: None, } } + fn upsert_field(&mut self, field_id: u32, field_start: usize) { + match self.fields.iter().position(|&(id, _)| id == field_id) { + Some(i) => self.fields[i] = (field_id, field_start), + None => self.fields.push((field_id, field_start)), + } + } + fn check_pending_field(&mut self) { let Some((field_name, field_start)) = self.pending.as_ref() else { return; }; let field_id = self.metadata_builder.upsert_field_name(field_name); - self.fields.insert(field_id, *field_start); + self.upsert_field(field_id, *field_start); self.pending = None; } @@ -605,7 +616,7 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { let field_id = self.metadata_builder.upsert_field_name(key); let field_start = self.buffer.offset(); - self.fields.insert(field_id, field_start); + self.upsert_field(field_id, field_start); self.buffer.append_non_nested_value(value); } @@ -643,16 +654,15 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { let num_fields = self.fields.len(); let is_large = num_fields > u8::MAX as usize; - let field_ids_by_sorted_field_name = self - .metadata_builder - .field_name_to_id - .iter() - .filter_map(|(_, id)| self.fields.contains_key(id).then_some(*id)) - .collect::>(); + self.fields.sort_by(|a, b| { + let key_a = &self.metadata_builder.field_name(a.0 as usize); + let key_b = &self.metadata_builder.field_name(b.0 as usize); + key_a.cmp(key_b) + }); - let max_id = self.fields.keys().last().copied().unwrap_or(0) as usize; + let max_id = self.fields.iter().map(|&(id, _)| id).max().unwrap_or(0); - let id_size = int_size(max_id); + let id_size = int_size(max_id as usize); let offset_size = int_size(data_size); // Write header @@ -664,13 +674,12 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { ); // Write field IDs (sorted order) - for id in &field_ids_by_sorted_field_name { - write_offset(self.parent_buffer.inner_mut(), *id as usize, id_size); + for &(id, _) in &self.fields { + write_offset(self.parent_buffer.inner_mut(), id as usize, id_size); } // Write field offsets - for id in &field_ids_by_sorted_field_name { - let &offset = self.fields.get(id).unwrap(); + for &(_, offset) in &self.fields { write_offset(self.parent_buffer.inner_mut(), offset, offset_size); } @@ -861,75 +870,6 @@ mod tests { assert_eq!(field_ids, vec![1, 2, 0]); } - #[test] - fn test_object_and_metadata_ordering() { - let mut builder = VariantBuilder::new(); - - let mut obj = builder.new_object(); - - obj.insert("zebra", "stripes"); // ID = 0 - obj.insert("apple", "red"); // ID = 1 - - { - // fields_map is ordered by insertion order (field id) - let fields_map = obj.fields.keys().copied().collect::>(); - assert_eq!(fields_map, vec![0, 1]); - - // dict is ordered by field names - let dict_metadata = obj - .metadata_builder - .field_name_to_id - .iter() - .map(|(f, i)| (f.as_str(), *i)) - .collect::>(); - - assert_eq!(dict_metadata, vec![("apple", 1), ("zebra", 0)]); - - // dict_keys is ordered by insertion order (field id) - let dict_keys = obj - .metadata_builder - .field_names - .iter() - .map(|k| k.as_str()) - .collect::>(); - assert_eq!(dict_keys, vec!["zebra", "apple"]); - } - - obj.insert("banana", "yellow"); // ID = 2 - - { - // fields_map is ordered by insertion order (field id) - let fields_map = obj.fields.keys().copied().collect::>(); - assert_eq!(fields_map, vec![0, 1, 2]); - - // dict is ordered by field names - let dict_metadata = obj - .metadata_builder - .field_name_to_id - .iter() - .map(|(f, i)| (f.as_str(), *i)) - .collect::>(); - - assert_eq!( - dict_metadata, - vec![("apple", 1), ("banana", 2), ("zebra", 0)] - ); - - // dict_keys is ordered by insertion order (field id) - let dict_keys = obj - .metadata_builder - .field_names - .iter() - .map(|k| k.as_str()) - .collect::>(); - assert_eq!(dict_keys, vec!["zebra", "apple", "banana"]); - } - - obj.finish(); - - builder.finish(); - } - #[test] fn test_duplicate_fields_in_object() { let mut builder = VariantBuilder::new(); @@ -1242,8 +1182,10 @@ mod tests { /* { "c": { + "b": false, "c": "a" - } + }, + "b": false, } */ @@ -1253,10 +1195,17 @@ mod tests { let mut outer_object_builder = builder.new_object(); { let mut inner_object_builder = outer_object_builder.new_object("c"); + inner_object_builder.insert("b", false); inner_object_builder.insert("c", "a"); + inner_object_builder.finish(); } + outer_object_builder.insert("b", false); + + // note, we can't guarantee an Objects field is sorted by field id. + assert_eq!(outer_object_builder.fields, vec![(1, 0), (0, 10)]); + outer_object_builder.finish(); } @@ -1264,15 +1213,17 @@ mod tests { let variant = Variant::try_new(&metadata, &value).unwrap(); let outer_object = variant.as_object().unwrap(); - assert_eq!(outer_object.len(), 1); - assert_eq!(outer_object.field_name(0).unwrap(), "c"); + assert_eq!(outer_object.len(), 2); + assert_eq!(outer_object.field_name(0).unwrap(), "b"); - let inner_object_variant = outer_object.field(0).unwrap(); + let inner_object_variant = outer_object.field(1).unwrap(); let inner_object = inner_object_variant.as_object().unwrap(); - assert_eq!(inner_object.len(), 1); - assert_eq!(inner_object.field_name(0).unwrap(), "c"); - assert_eq!(inner_object.field(0).unwrap(), Variant::from("a")); + assert_eq!(inner_object.len(), 2); + assert_eq!(inner_object.field_name(0).unwrap(), "b"); + assert_eq!(inner_object.field(0).unwrap(), Variant::from(false)); + assert_eq!(inner_object.field_name(1).unwrap(), "c"); + assert_eq!(inner_object.field(1).unwrap(), Variant::from("a")); } #[test] From ef8d91dcc03d780a7a8890673541b23165bb376e Mon Sep 17 00:00:00 2001 From: Matthew Kim <38759997+friendlymatthew@users.noreply.github.com> Date: Sat, 28 Jun 2025 09:20:00 -0400 Subject: [PATCH 2/7] Add benches --- parquet-variant/Cargo.toml | 12 +- parquet-variant/benches/builder.rs | 178 +++++++++++++++++++++++++++++ parquet-variant/src/builder.rs | 18 ++- 3 files changed, 203 insertions(+), 5 deletions(-) create mode 100644 parquet-variant/benches/builder.rs diff --git a/parquet-variant/Cargo.toml b/parquet-variant/Cargo.toml index 6bec373d0204..a43dd7a804c2 100644 --- a/parquet-variant/Cargo.toml +++ b/parquet-variant/Cargo.toml @@ -40,6 +40,16 @@ base64 = "0.22" [dev-dependencies] paste = { version = "1.0" } - +criterion = { version = "0.6", default-features = false } +rand = { version = "0.9", default-features = false, features = [ + "std", + "std_rng", + "thread_rng", +] } [lib] + + +[[bench]] +name = "builder" +harness = false diff --git a/parquet-variant/benches/builder.rs b/parquet-variant/benches/builder.rs new file mode 100644 index 000000000000..92accef07e2a --- /dev/null +++ b/parquet-variant/benches/builder.rs @@ -0,0 +1,178 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate parquet_variant; + +use criterion::*; + +use parquet_variant::VariantBuilder; +use rand::{ + distr::{uniform::SampleUniform, Alphanumeric}, + rngs::ThreadRng, + Rng, +}; +use std::{hint, ops::Range}; + +fn random(rng: &mut ThreadRng, range: Range) -> T { + rng.random_range::(range) +} + +// generates a string with a 50/50 chance whether it's a short or a long string +fn random_string(rng: &mut ThreadRng) -> String { + let len = rng.random_range::(1..128); + + rng.sample_iter(&Alphanumeric) + .take(len) + .map(char::from) + .collect() +} + +// generates a string guaranteed to be longer than 64 bytes +fn random_long_string(rng: &mut ThreadRng) -> String { + let len = rng.random_range::(65..200); + + rng.sample_iter(&Alphanumeric) + .take(len) + .map(char::from) + .collect() +} + +// Creates an object with field names inserted in reverse lexicographical order +fn bench_object_field_names_reverse_order(c: &mut Criterion) { + c.bench_function("bench_object_field_names_reverse_order", |b| { + b.iter(|| { + let mut rng = rand::rng(); + + let mut variant = VariantBuilder::new(); + let mut object_builder = variant.new_object(); + + for i in 0..50_000 { + object_builder.insert( + format!("{}", 1000 - i).as_str(), + random_string(&mut rng).as_str(), + ); + } + + object_builder.finish(); + hint::black_box(variant.finish()); + }) + }); +} + +// Creates a list of objects with the same schema (same field names) +/* + { + name: String, + age: i32, + likes_cilantro: bool, + comments: Long string + dishes: Vec + } +*/ +fn bench_object_list_same_schemas(c: &mut Criterion) { + c.bench_function("bench_object_list_same_schema", |b| { + b.iter(|| { + let mut rng = rand::rng(); + + let mut variant = VariantBuilder::new(); + + let mut list_builder = variant.new_list(); + + for _ in 0..25_000 { + let mut object_builder = list_builder.new_object(); + object_builder.insert("name", random_string(&mut rng).as_str()); + object_builder.insert("age", random::(&mut rng, 18..100) as i32); + object_builder.insert("likes_cilantro", rng.random_bool(0.5)); + object_builder.insert("comments", random_long_string(&mut rng).as_str()); + + let mut list_builder = object_builder.new_list("dishes"); + list_builder.append_value(random_string(&mut rng).as_str()); + list_builder.append_value(random_string(&mut rng).as_str()); + list_builder.append_value(random_string(&mut rng).as_str()); + + list_builder.finish(); + object_builder.finish(); + } + + list_builder.finish(); + hint::black_box(variant.finish()); + }) + }); +} + +// Creates a list of variant objects with an undefined schema (random field names) +// values are randomly generated, with an equal distribution to whether it's a String, Object, or List +fn bench_object_list_unknown_schema(c: &mut Criterion) { + c.bench_function("bench_object_list_unknown_schema", |b| { + b.iter(|| { + let mut rng = rand::rng(); + + let mut variant = VariantBuilder::new(); + + let mut list_builder = variant.new_list(); + + for _ in 0..200 { + let mut object_builder = list_builder.new_object(); + + for _num_fields in 0..random::(&mut rng, 0..100) { + if rng.random_bool(0.33) { + object_builder.insert( + random_string(&mut rng).as_str(), + random_string(&mut rng).as_str(), + ); + continue; + } + + if rng.random_bool(0.5) { + let mut inner_object_builder = object_builder.new_object("rand_object"); + + for _num_fields in 0..random::(&mut rng, 0..25) { + inner_object_builder.insert( + random_string(&mut rng).as_str(), + random_string(&mut rng).as_str(), + ); + } + inner_object_builder.finish(); + + continue; + } + + let mut inner_list_builder = object_builder.new_list("rand_list"); + + for _num_elements in 0..random::(&mut rng, 0..25) { + inner_list_builder.append_value(random_string(&mut rng).as_str()); + } + + inner_list_builder.finish(); + } + object_builder.finish(); + } + + list_builder.finish(); + hint::black_box(variant.finish()); + }) + }); +} + +criterion_group!( + benches, + bench_object_field_names_reverse_order, + bench_object_list_same_schemas, + bench_object_list_unknown_schema, +); + +criterion_main!(benches); diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index 6d9cd143d657..74e4b5492bce 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -571,7 +571,8 @@ impl<'a> ListBuilder<'a> { pub struct ObjectBuilder<'a, 'b> { parent_buffer: &'a mut ValueBuffer, metadata_builder: &'a mut MetadataBuilder, - fields: Vec<(u32, usize)>, // (field_id, offset) + fields: Vec<(u32, usize)>, // (field_id, offset) + field_id_to_index: HashMap, // (field_id, index to `fields`) buffer: ValueBuffer, /// Is there a pending list or object that needs to be finalized? pending: Option<(&'b str, usize)>, @@ -583,15 +584,24 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { parent_buffer, metadata_builder, fields: Vec::new(), + field_id_to_index: HashMap::new(), buffer: ValueBuffer::default(), pending: None, } } fn upsert_field(&mut self, field_id: u32, field_start: usize) { - match self.fields.iter().position(|&(id, _)| id == field_id) { - Some(i) => self.fields[i] = (field_id, field_start), - None => self.fields.push((field_id, field_start)), + use std::collections::hash_map::Entry; + + match self.field_id_to_index.entry(field_id) { + Entry::Occupied(occupied_entry) => { + let i = *occupied_entry.get(); + self.fields[i] = (field_id, field_start); + } + Entry::Vacant(vacant_entry) => { + vacant_entry.insert(self.fields.len()); + self.fields.push((field_id, field_start)); + } } } From cbb17a5460d1def0f5a3460deb712afebf0600a8 Mon Sep 17 00:00:00 2001 From: Matthew Kim <38759997+friendlymatthew@users.noreply.github.com> Date: Sat, 28 Jun 2025 20:05:08 -0400 Subject: [PATCH 3/7] Use index set --- parquet-variant/Cargo.toml | 1 + parquet-variant/src/builder.rs | 17 +++++------------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/parquet-variant/Cargo.toml b/parquet-variant/Cargo.toml index a43dd7a804c2..24500c588816 100644 --- a/parquet-variant/Cargo.toml +++ b/parquet-variant/Cargo.toml @@ -37,6 +37,7 @@ arrow-schema = { workspace = true } chrono = { workspace = true } serde_json = "1.0" base64 = "0.22" +indexmap = "2.10.0" [dev-dependencies] paste = { version = "1.0" } diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index 74e4b5492bce..d9605204765d 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -16,6 +16,7 @@ // under the License. use crate::decoder::{VariantBasicType, VariantPrimitiveType}; use crate::{ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8}; +use indexmap::IndexSet; use std::collections::HashMap; const BASIC_TYPE_BITS: u8 = 2; @@ -233,23 +234,15 @@ impl ValueBuffer { #[derive(Default)] struct MetadataBuilder { - field_name_to_id: HashMap, - field_names: Vec, + field_names: IndexSet, } impl MetadataBuilder { /// Upsert field name to dictionary, return its ID fn upsert_field_name(&mut self, field_name: &str) -> u32 { - use std::collections::hash_map::Entry; - match self.field_name_to_id.entry(field_name.to_string()) { - Entry::Occupied(entry) => *entry.get(), - Entry::Vacant(entry) => { - let id = self.field_names.len() as u32; - entry.insert(id); - self.field_names.push(field_name.to_string()); - id - } - } + let (id, _) = self.field_names.insert_full(field_name.to_string()); + + id as u32 } fn num_field_names(&self) -> usize { From d4b5bd8f57a88d3d2746ff4f898427876c5b820d Mon Sep 17 00:00:00 2001 From: Matthew Kim <38759997+friendlymatthew@users.noreply.github.com> Date: Sat, 28 Jun 2025 22:30:23 -0400 Subject: [PATCH 4/7] Add more benchmarks --- parquet-variant/benches/builder.rs | 215 ++++++++++++++++++++++++++++- 1 file changed, 213 insertions(+), 2 deletions(-) diff --git a/parquet-variant/benches/builder.rs b/parquet-variant/benches/builder.rs index 92accef07e2a..c3ee9c87a866 100644 --- a/parquet-variant/benches/builder.rs +++ b/parquet-variant/benches/builder.rs @@ -73,6 +73,43 @@ fn bench_object_field_names_reverse_order(c: &mut Criterion) { }); } +// Creates objects with a homogenous schema (same field names) +/* + { + name: String, + age: i32, + likes_cilantro: bool, + comments: Long string + dishes: Vec + } +*/ +fn bench_object_same_schema(c: &mut Criterion) { + c.bench_function("bench_object_same_schema", |b| { + b.iter(|| { + let mut rng = rand::rng(); + + for _ in 0..25_000 { + let mut variant = VariantBuilder::new(); + let mut object_builder = variant.new_object(); + object_builder.insert("name", random_string(&mut rng).as_str()); + object_builder.insert("age", random::(&mut rng, 18..100) as i32); + object_builder.insert("likes_cilantro", rng.random_bool(0.5)); + object_builder.insert("comments", random_long_string(&mut rng).as_str()); + + let mut inner_list_builder = object_builder.new_list("dishes"); + inner_list_builder.append_value(random_string(&mut rng).as_str()); + inner_list_builder.append_value(random_string(&mut rng).as_str()); + inner_list_builder.append_value(random_string(&mut rng).as_str()); + + inner_list_builder.finish(); + object_builder.finish(); + + hint::black_box(variant.finish()); + } + }) + }); +} + // Creates a list of objects with the same schema (same field names) /* { @@ -83,7 +120,7 @@ fn bench_object_field_names_reverse_order(c: &mut Criterion) { dishes: Vec } */ -fn bench_object_list_same_schemas(c: &mut Criterion) { +fn bench_object_list_same_schema(c: &mut Criterion) { c.bench_function("bench_object_list_same_schema", |b| { b.iter(|| { let mut rng = rand::rng(); @@ -114,6 +151,55 @@ fn bench_object_list_same_schemas(c: &mut Criterion) { }); } +// Creates variant objects with an undefined schema (random field names) +// values are randomly generated, with an equal distribution to whether it's a String, Object, or List +fn bench_object_unknown_schema(c: &mut Criterion) { + c.bench_function("bench_object_unknown_schema", |b| { + b.iter(|| { + let mut rng = rand::rng(); + + for _ in 0..200 { + let mut variant = VariantBuilder::new(); + let mut object_builder = variant.new_object(); + + for _num_fields in 0..random::(&mut rng, 0..100) { + if rng.random_bool(0.33) { + object_builder.insert( + random_string(&mut rng).as_str(), + random_string(&mut rng).as_str(), + ); + continue; + } + + if rng.random_bool(0.5) { + let mut inner_object_builder = object_builder.new_object("rand_object"); + + for _num_fields in 0..random::(&mut rng, 0..25) { + inner_object_builder.insert( + random_string(&mut rng).as_str(), + random_string(&mut rng).as_str(), + ); + } + inner_object_builder.finish(); + + continue; + } + + let mut inner_list_builder = object_builder.new_list("rand_list"); + + for _num_elements in 0..random::(&mut rng, 0..25) { + inner_list_builder.append_value(random_string(&mut rng).as_str()); + } + + inner_list_builder.finish(); + } + object_builder.finish(); + hint::black_box(variant.finish()); + } + }) + }); +} + // Creates a list of variant objects with an undefined schema (random field names) // values are randomly generated, with an equal distribution to whether it's a String, Object, or List fn bench_object_list_unknown_schema(c: &mut Criterion) { @@ -168,11 +254,136 @@ fn bench_object_list_unknown_schema(c: &mut Criterion) { }); } +// Creates objects with a homogenous schema (same field names) +/* + { + "id": &[u8], // Following are common across all objects + "span_id: &[u8], + "created": u32, + "ended": u32, + "span_name": String, + + "attributees": { + // following fields are randomized + } + } +*/ +fn bench_object_partially_same_schema(c: &mut Criterion) { + c.bench_function("bench_object_partially_same_schema", |b| { + b.iter(|| { + let mut rng = rand::rng(); + + for _ in 0..200 { + let mut variant = VariantBuilder::new(); + let mut object_builder = variant.new_object(); + + object_builder.insert( + "id", + random::(&mut rng, 0..i128::MAX) + .to_le_bytes() + .as_slice(), + ); + + object_builder.insert( + "span_id", + random::(&mut rng, 0..i128::MAX) + .to_le_bytes() + .as_slice(), + ); + + object_builder.insert("created", random::(&mut rng, 0..u32::MAX) as i32); + object_builder.insert("ended", random::(&mut rng, 0..u32::MAX) as i32); + object_builder.insert("span_name", random_string(&mut rng).as_str()); + + { + let mut inner_object_builder = object_builder.new_object("attributes"); + + for _num_fields in 0..random::(&mut rng, 0..100) { + let random_key = random_string(&mut rng); + inner_object_builder.insert(&random_key, random_string(&mut rng).as_str()); + } + inner_object_builder.finish(); + } + + object_builder.finish(); + hint::black_box(variant.finish()); + } + }) + }); +} + +// Creates a list of variant objects with a partially homogenous schema (similar field names) +/* + { + "id": &[u8], // Following are common across all objects + "span_id: &[u8], + "created": u32, + "ended": u32, + "span_name": String, + + "attributees": { + // following fields are randomized + } + } +*/ +fn bench_object_list_partially_same_schema(c: &mut Criterion) { + c.bench_function("bench_object_list_partially_same_schema", |b| { + b.iter(|| { + let mut rng = rand::rng(); + + let mut variant = VariantBuilder::new(); + + let mut list_builder = variant.new_list(); + + for _ in 0..100 { + let mut object_builder = list_builder.new_object(); + + object_builder.insert( + "id", + random::(&mut rng, 0..i128::MAX) + .to_le_bytes() + .as_slice(), + ); + + object_builder.insert( + "span_id", + random::(&mut rng, 0..i128::MAX) + .to_le_bytes() + .as_slice(), + ); + + object_builder.insert("created", random::(&mut rng, 0..u32::MAX) as i32); + object_builder.insert("ended", random::(&mut rng, 0..u32::MAX) as i32); + object_builder.insert("span_name", random_string(&mut rng).as_str()); + + { + let mut inner_object_builder = object_builder.new_object("attributes"); + + for _num_fields in 0..random::(&mut rng, 0..100) { + let random_key = random_string(&mut rng); + inner_object_builder.insert(&random_key, random_string(&mut rng).as_str()); + } + inner_object_builder.finish(); + } + + object_builder.finish(); + } + + list_builder.finish(); + hint::black_box(variant.finish()); + }) + }); +} + criterion_group!( benches, bench_object_field_names_reverse_order, - bench_object_list_same_schemas, + bench_object_same_schema, + bench_object_list_same_schema, + bench_object_unknown_schema, bench_object_list_unknown_schema, + bench_object_partially_same_schema, + bench_object_list_partially_same_schema ); criterion_main!(benches); From b3324da46f9e4dfdc8cff8eecd9a93ef9c870451 Mon Sep 17 00:00:00 2001 From: Matthew Kim <38759997+friendlymatthew@users.noreply.github.com> Date: Sun, 29 Jun 2025 09:02:55 -0400 Subject: [PATCH 5/7] Comments --- parquet-variant/Cargo.toml | 9 +- .../{builder.rs => variant_builder.rs} | 123 ++++++++++-------- parquet-variant/src/builder.rs | 23 ++-- 3 files changed, 83 insertions(+), 72 deletions(-) rename parquet-variant/benches/{builder.rs => variant_builder.rs} (77%) diff --git a/parquet-variant/Cargo.toml b/parquet-variant/Cargo.toml index 24500c588816..708b614cf4b7 100644 --- a/parquet-variant/Cargo.toml +++ b/parquet-variant/Cargo.toml @@ -39,6 +39,11 @@ serde_json = "1.0" base64 = "0.22" indexmap = "2.10.0" + +[lib] +name = "parquet_variant" +bench = false + [dev-dependencies] paste = { version = "1.0" } criterion = { version = "0.6", default-features = false } @@ -48,9 +53,7 @@ rand = { version = "0.9", default-features = false, features = [ "thread_rng", ] } -[lib] - [[bench]] -name = "builder" +name = "variant_builder" harness = false diff --git a/parquet-variant/benches/builder.rs b/parquet-variant/benches/variant_builder.rs similarity index 77% rename from parquet-variant/benches/builder.rs rename to parquet-variant/benches/variant_builder.rs index c3ee9c87a866..cce5fb076fdc 100644 --- a/parquet-variant/benches/builder.rs +++ b/parquet-variant/benches/variant_builder.rs @@ -41,30 +41,40 @@ fn random_string(rng: &mut ThreadRng) -> String { .collect() } -// generates a string guaranteed to be longer than 64 bytes -fn random_long_string(rng: &mut ThreadRng) -> String { - let len = rng.random_range::(65..200); +struct RandomStringGenerator { + cursor: usize, + table: Vec, +} - rng.sample_iter(&Alphanumeric) - .take(len) - .map(char::from) - .collect() +impl RandomStringGenerator { + pub fn new(rng: &mut ThreadRng, capacity: usize) -> Self { + let table = (0..capacity) + .map(|_| random_string(rng)) + .collect::>(); + + Self { cursor: 0, table } + } + + pub fn next(&mut self) -> &str { + let this = &self.table[self.cursor]; + + self.cursor = (self.cursor + 1) % self.table.len(); + + this + } } // Creates an object with field names inserted in reverse lexicographical order fn bench_object_field_names_reverse_order(c: &mut Criterion) { c.bench_function("bench_object_field_names_reverse_order", |b| { + let mut rng = rand::rng(); + let mut string_table = RandomStringGenerator::new(&mut rng, 117); b.iter(|| { - let mut rng = rand::rng(); - let mut variant = VariantBuilder::new(); let mut object_builder = variant.new_object(); for i in 0..50_000 { - object_builder.insert( - format!("{}", 1000 - i).as_str(), - random_string(&mut rng).as_str(), - ); + object_builder.insert(format!("{}", 1000 - i).as_str(), string_table.next()); } object_builder.finish(); @@ -84,22 +94,23 @@ fn bench_object_field_names_reverse_order(c: &mut Criterion) { } */ fn bench_object_same_schema(c: &mut Criterion) { + let mut rng = rand::rng(); + let mut string_table = RandomStringGenerator::new(&mut rng, 117); + c.bench_function("bench_object_same_schema", |b| { b.iter(|| { - let mut rng = rand::rng(); - for _ in 0..25_000 { let mut variant = VariantBuilder::new(); let mut object_builder = variant.new_object(); - object_builder.insert("name", random_string(&mut rng).as_str()); + object_builder.insert("name", string_table.next()); object_builder.insert("age", random::(&mut rng, 18..100) as i32); object_builder.insert("likes_cilantro", rng.random_bool(0.5)); - object_builder.insert("comments", random_long_string(&mut rng).as_str()); + object_builder.insert("comments", string_table.next()); let mut inner_list_builder = object_builder.new_list("dishes"); - inner_list_builder.append_value(random_string(&mut rng).as_str()); - inner_list_builder.append_value(random_string(&mut rng).as_str()); - inner_list_builder.append_value(random_string(&mut rng).as_str()); + inner_list_builder.append_value(string_table.next()); + inner_list_builder.append_value(string_table.next()); + inner_list_builder.append_value(string_table.next()); inner_list_builder.finish(); object_builder.finish(); @@ -122,24 +133,25 @@ fn bench_object_same_schema(c: &mut Criterion) { */ fn bench_object_list_same_schema(c: &mut Criterion) { c.bench_function("bench_object_list_same_schema", |b| { - b.iter(|| { - let mut rng = rand::rng(); + let mut rng = rand::rng(); + let mut string_table = RandomStringGenerator::new(&mut rng, 101); + b.iter(|| { let mut variant = VariantBuilder::new(); let mut list_builder = variant.new_list(); for _ in 0..25_000 { let mut object_builder = list_builder.new_object(); - object_builder.insert("name", random_string(&mut rng).as_str()); + object_builder.insert("name", string_table.next()); object_builder.insert("age", random::(&mut rng, 18..100) as i32); object_builder.insert("likes_cilantro", rng.random_bool(0.5)); - object_builder.insert("comments", random_long_string(&mut rng).as_str()); + object_builder.insert("comments", string_table.next()); let mut list_builder = object_builder.new_list("dishes"); - list_builder.append_value(random_string(&mut rng).as_str()); - list_builder.append_value(random_string(&mut rng).as_str()); - list_builder.append_value(random_string(&mut rng).as_str()); + list_builder.append_value(string_table.next()); + list_builder.append_value(string_table.next()); + list_builder.append_value(string_table.next()); list_builder.finish(); object_builder.finish(); @@ -155,19 +167,18 @@ fn bench_object_list_same_schema(c: &mut Criterion) { // values are randomly generated, with an equal distribution to whether it's a String, Object, or List fn bench_object_unknown_schema(c: &mut Criterion) { c.bench_function("bench_object_unknown_schema", |b| { - b.iter(|| { - let mut rng = rand::rng(); + let mut rng = rand::rng(); + let mut string_table = RandomStringGenerator::new(&mut rng, 1001); + b.iter(|| { for _ in 0..200 { let mut variant = VariantBuilder::new(); let mut object_builder = variant.new_object(); for _num_fields in 0..random::(&mut rng, 0..100) { if rng.random_bool(0.33) { - object_builder.insert( - random_string(&mut rng).as_str(), - random_string(&mut rng).as_str(), - ); + let key = string_table.next(); + object_builder.insert(key, key); continue; } @@ -175,10 +186,8 @@ fn bench_object_unknown_schema(c: &mut Criterion) { let mut inner_object_builder = object_builder.new_object("rand_object"); for _num_fields in 0..random::(&mut rng, 0..25) { - inner_object_builder.insert( - random_string(&mut rng).as_str(), - random_string(&mut rng).as_str(), - ); + let key = string_table.next(); + inner_object_builder.insert(key, key); } inner_object_builder.finish(); @@ -188,7 +197,7 @@ fn bench_object_unknown_schema(c: &mut Criterion) { let mut inner_list_builder = object_builder.new_list("rand_list"); for _num_elements in 0..random::(&mut rng, 0..25) { - inner_list_builder.append_value(random_string(&mut rng).as_str()); + inner_list_builder.append_value(string_table.next()); } inner_list_builder.finish(); @@ -204,6 +213,9 @@ fn bench_object_unknown_schema(c: &mut Criterion) { // values are randomly generated, with an equal distribution to whether it's a String, Object, or List fn bench_object_list_unknown_schema(c: &mut Criterion) { c.bench_function("bench_object_list_unknown_schema", |b| { + let mut rng = rand::rng(); + let mut string_table = RandomStringGenerator::new(&mut rng, 1001); + b.iter(|| { let mut rng = rand::rng(); @@ -215,11 +227,10 @@ fn bench_object_list_unknown_schema(c: &mut Criterion) { let mut object_builder = list_builder.new_object(); for _num_fields in 0..random::(&mut rng, 0..100) { + let key = string_table.next(); + if rng.random_bool(0.33) { - object_builder.insert( - random_string(&mut rng).as_str(), - random_string(&mut rng).as_str(), - ); + object_builder.insert(key, key); continue; } @@ -227,10 +238,8 @@ fn bench_object_list_unknown_schema(c: &mut Criterion) { let mut inner_object_builder = object_builder.new_object("rand_object"); for _num_fields in 0..random::(&mut rng, 0..25) { - inner_object_builder.insert( - random_string(&mut rng).as_str(), - random_string(&mut rng).as_str(), - ); + let key = string_table.next(); + inner_object_builder.insert(key, key); } inner_object_builder.finish(); @@ -240,7 +249,7 @@ fn bench_object_list_unknown_schema(c: &mut Criterion) { let mut inner_list_builder = object_builder.new_list("rand_list"); for _num_elements in 0..random::(&mut rng, 0..25) { - inner_list_builder.append_value(random_string(&mut rng).as_str()); + inner_list_builder.append_value(key); } inner_list_builder.finish(); @@ -270,6 +279,9 @@ fn bench_object_list_unknown_schema(c: &mut Criterion) { */ fn bench_object_partially_same_schema(c: &mut Criterion) { c.bench_function("bench_object_partially_same_schema", |b| { + let mut rng = rand::rng(); + let mut string_table = RandomStringGenerator::new(&mut rng, 117); + b.iter(|| { let mut rng = rand::rng(); @@ -293,14 +305,14 @@ fn bench_object_partially_same_schema(c: &mut Criterion) { object_builder.insert("created", random::(&mut rng, 0..u32::MAX) as i32); object_builder.insert("ended", random::(&mut rng, 0..u32::MAX) as i32); - object_builder.insert("span_name", random_string(&mut rng).as_str()); + object_builder.insert("span_name", string_table.next()); { let mut inner_object_builder = object_builder.new_object("attributes"); for _num_fields in 0..random::(&mut rng, 0..100) { - let random_key = random_string(&mut rng); - inner_object_builder.insert(&random_key, random_string(&mut rng).as_str()); + let key = string_table.next(); + inner_object_builder.insert(key, key); } inner_object_builder.finish(); } @@ -328,9 +340,10 @@ fn bench_object_partially_same_schema(c: &mut Criterion) { */ fn bench_object_list_partially_same_schema(c: &mut Criterion) { c.bench_function("bench_object_list_partially_same_schema", |b| { - b.iter(|| { - let mut rng = rand::rng(); + let mut rng = rand::rng(); + let mut string_table = RandomStringGenerator::new(&mut rng, 117); + b.iter(|| { let mut variant = VariantBuilder::new(); let mut list_builder = variant.new_list(); @@ -354,14 +367,14 @@ fn bench_object_list_partially_same_schema(c: &mut Criterion) { object_builder.insert("created", random::(&mut rng, 0..u32::MAX) as i32); object_builder.insert("ended", random::(&mut rng, 0..u32::MAX) as i32); - object_builder.insert("span_name", random_string(&mut rng).as_str()); + object_builder.insert("span_name", string_table.next()); { let mut inner_object_builder = object_builder.new_object("attributes"); for _num_fields in 0..random::(&mut rng, 0..100) { - let random_key = random_string(&mut rng); - inner_object_builder.insert(&random_key, random_string(&mut rng).as_str()); + let key = string_table.next(); + inner_object_builder.insert(key, key); } inner_object_builder.finish(); } diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index d9605204765d..a9000edbbb3f 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -17,7 +17,6 @@ use crate::decoder::{VariantBasicType, VariantPrimitiveType}; use crate::{ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8}; use indexmap::IndexSet; -use std::collections::HashMap; const BASIC_TYPE_BITS: u8 = 2; const UNIX_EPOCH_DATE: chrono::NaiveDate = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); @@ -234,6 +233,7 @@ impl ValueBuffer { #[derive(Default)] struct MetadataBuilder { + // Field names -- field_ids are assigned in insert order field_names: IndexSet, } @@ -564,8 +564,8 @@ impl<'a> ListBuilder<'a> { pub struct ObjectBuilder<'a, 'b> { parent_buffer: &'a mut ValueBuffer, metadata_builder: &'a mut MetadataBuilder, - fields: Vec<(u32, usize)>, // (field_id, offset) - field_id_to_index: HashMap, // (field_id, index to `fields`) + fields: Vec<(u32, usize)>, // (field_id, offset) + fields_index_by_field_id: IndexSet, buffer: ValueBuffer, /// Is there a pending list or object that needs to be finalized? pending: Option<(&'b str, usize)>, @@ -577,24 +577,19 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { parent_buffer, metadata_builder, fields: Vec::new(), - field_id_to_index: HashMap::new(), + fields_index_by_field_id: IndexSet::new(), buffer: ValueBuffer::default(), pending: None, } } fn upsert_field(&mut self, field_id: u32, field_start: usize) { - use std::collections::hash_map::Entry; + let (i, new_entry) = self.fields_index_by_field_id.insert_full(field_id); - match self.field_id_to_index.entry(field_id) { - Entry::Occupied(occupied_entry) => { - let i = *occupied_entry.get(); - self.fields[i] = (field_id, field_start); - } - Entry::Vacant(vacant_entry) => { - vacant_entry.insert(self.fields.len()); - self.fields.push((field_id, field_start)); - } + if new_entry { + self.fields.push((field_id, field_start)); + } else { + self.fields[i] = (field_id, field_start); } } From 8658805239719754b7de9babfce3a0de5b950b11 Mon Sep 17 00:00:00 2001 From: Matthew Kim <38759997+friendlymatthew@users.noreply.github.com> Date: Sun, 29 Jun 2025 20:51:42 -0400 Subject: [PATCH 6/7] Use seedable range --- parquet-variant/benches/variant_builder.rs | 28 +++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/parquet-variant/benches/variant_builder.rs b/parquet-variant/benches/variant_builder.rs index cce5fb076fdc..432c4192e3d0 100644 --- a/parquet-variant/benches/variant_builder.rs +++ b/parquet-variant/benches/variant_builder.rs @@ -22,17 +22,17 @@ use criterion::*; use parquet_variant::VariantBuilder; use rand::{ distr::{uniform::SampleUniform, Alphanumeric}, - rngs::ThreadRng, - Rng, + rngs::StdRng, + Rng, SeedableRng, }; use std::{hint, ops::Range}; -fn random(rng: &mut ThreadRng, range: Range) -> T { +fn random(rng: &mut StdRng, range: Range) -> T { rng.random_range::(range) } // generates a string with a 50/50 chance whether it's a short or a long string -fn random_string(rng: &mut ThreadRng) -> String { +fn random_string(rng: &mut StdRng) -> String { let len = rng.random_range::(1..128); rng.sample_iter(&Alphanumeric) @@ -47,7 +47,7 @@ struct RandomStringGenerator { } impl RandomStringGenerator { - pub fn new(rng: &mut ThreadRng, capacity: usize) -> Self { + pub fn new(rng: &mut StdRng, capacity: usize) -> Self { let table = (0..capacity) .map(|_| random_string(rng)) .collect::>(); @@ -67,7 +67,7 @@ impl RandomStringGenerator { // Creates an object with field names inserted in reverse lexicographical order fn bench_object_field_names_reverse_order(c: &mut Criterion) { c.bench_function("bench_object_field_names_reverse_order", |b| { - let mut rng = rand::rng(); + let mut rng = StdRng::seed_from_u64(42); let mut string_table = RandomStringGenerator::new(&mut rng, 117); b.iter(|| { let mut variant = VariantBuilder::new(); @@ -94,7 +94,7 @@ fn bench_object_field_names_reverse_order(c: &mut Criterion) { } */ fn bench_object_same_schema(c: &mut Criterion) { - let mut rng = rand::rng(); + let mut rng = StdRng::seed_from_u64(42); let mut string_table = RandomStringGenerator::new(&mut rng, 117); c.bench_function("bench_object_same_schema", |b| { @@ -133,7 +133,7 @@ fn bench_object_same_schema(c: &mut Criterion) { */ fn bench_object_list_same_schema(c: &mut Criterion) { c.bench_function("bench_object_list_same_schema", |b| { - let mut rng = rand::rng(); + let mut rng = StdRng::seed_from_u64(42); let mut string_table = RandomStringGenerator::new(&mut rng, 101); b.iter(|| { @@ -167,7 +167,7 @@ fn bench_object_list_same_schema(c: &mut Criterion) { // values are randomly generated, with an equal distribution to whether it's a String, Object, or List fn bench_object_unknown_schema(c: &mut Criterion) { c.bench_function("bench_object_unknown_schema", |b| { - let mut rng = rand::rng(); + let mut rng = StdRng::seed_from_u64(42); let mut string_table = RandomStringGenerator::new(&mut rng, 1001); b.iter(|| { @@ -213,11 +213,11 @@ fn bench_object_unknown_schema(c: &mut Criterion) { // values are randomly generated, with an equal distribution to whether it's a String, Object, or List fn bench_object_list_unknown_schema(c: &mut Criterion) { c.bench_function("bench_object_list_unknown_schema", |b| { - let mut rng = rand::rng(); + let mut rng = StdRng::seed_from_u64(42); let mut string_table = RandomStringGenerator::new(&mut rng, 1001); b.iter(|| { - let mut rng = rand::rng(); + let mut rng = StdRng::seed_from_u64(42); let mut variant = VariantBuilder::new(); @@ -279,11 +279,11 @@ fn bench_object_list_unknown_schema(c: &mut Criterion) { */ fn bench_object_partially_same_schema(c: &mut Criterion) { c.bench_function("bench_object_partially_same_schema", |b| { - let mut rng = rand::rng(); + let mut rng = StdRng::seed_from_u64(42); let mut string_table = RandomStringGenerator::new(&mut rng, 117); b.iter(|| { - let mut rng = rand::rng(); + let mut rng = StdRng::seed_from_u64(42); for _ in 0..200 { let mut variant = VariantBuilder::new(); @@ -340,7 +340,7 @@ fn bench_object_partially_same_schema(c: &mut Criterion) { */ fn bench_object_list_partially_same_schema(c: &mut Criterion) { c.bench_function("bench_object_list_partially_same_schema", |b| { - let mut rng = rand::rng(); + let mut rng = StdRng::seed_from_u64(42); let mut string_table = RandomStringGenerator::new(&mut rng, 117); b.iter(|| { From cd1094595dbefe7158ca6e5d519a7d7dd47c2909 Mon Sep 17 00:00:00 2001 From: Matthew Kim <38759997+friendlymatthew@users.noreply.github.com> Date: Mon, 30 Jun 2025 14:38:28 -0400 Subject: [PATCH 7/7] Use index map for ObjectBuilder fields --- parquet-variant/src/builder.rs | 40 ++++++++++------------------------ 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index a9000edbbb3f..f0f32371475c 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -16,7 +16,7 @@ // under the License. use crate::decoder::{VariantBasicType, VariantPrimitiveType}; use crate::{ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8}; -use indexmap::IndexSet; +use indexmap::{IndexMap, IndexSet}; const BASIC_TYPE_BITS: u8 = 2; const UNIX_EPOCH_DATE: chrono::NaiveDate = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); @@ -564,8 +564,7 @@ impl<'a> ListBuilder<'a> { pub struct ObjectBuilder<'a, 'b> { parent_buffer: &'a mut ValueBuffer, metadata_builder: &'a mut MetadataBuilder, - fields: Vec<(u32, usize)>, // (field_id, offset) - fields_index_by_field_id: IndexSet, + fields: IndexMap, // (field_id, offset) buffer: ValueBuffer, /// Is there a pending list or object that needs to be finalized? pending: Option<(&'b str, usize)>, @@ -576,30 +575,19 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { Self { parent_buffer, metadata_builder, - fields: Vec::new(), - fields_index_by_field_id: IndexSet::new(), + fields: IndexMap::new(), buffer: ValueBuffer::default(), pending: None, } } - fn upsert_field(&mut self, field_id: u32, field_start: usize) { - let (i, new_entry) = self.fields_index_by_field_id.insert_full(field_id); - - if new_entry { - self.fields.push((field_id, field_start)); - } else { - self.fields[i] = (field_id, field_start); - } - } - fn check_pending_field(&mut self) { - let Some((field_name, field_start)) = self.pending.as_ref() else { + let Some(&(field_name, field_start)) = self.pending.as_ref() else { return; }; let field_id = self.metadata_builder.upsert_field_name(field_name); - self.upsert_field(field_id, *field_start); + self.fields.insert(field_id, field_start); self.pending = None; } @@ -614,7 +602,7 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { let field_id = self.metadata_builder.upsert_field_name(key); let field_start = self.buffer.offset(); - self.upsert_field(field_id, field_start); + self.fields.insert(field_id, field_start); self.buffer.append_non_nested_value(value); } @@ -652,13 +640,13 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { let num_fields = self.fields.len(); let is_large = num_fields > u8::MAX as usize; - self.fields.sort_by(|a, b| { - let key_a = &self.metadata_builder.field_name(a.0 as usize); - let key_b = &self.metadata_builder.field_name(b.0 as usize); + self.fields.sort_by(|&field_a_id, _, &field_b_id, _| { + let key_a = &self.metadata_builder.field_name(field_a_id as usize); + let key_b = &self.metadata_builder.field_name(field_b_id as usize); key_a.cmp(key_b) }); - let max_id = self.fields.iter().map(|&(id, _)| id).max().unwrap_or(0); + let max_id = self.fields.iter().map(|(i, _)| *i).max().unwrap_or(0); let id_size = int_size(max_id as usize); let offset_size = int_size(data_size); @@ -672,12 +660,12 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { ); // Write field IDs (sorted order) - for &(id, _) in &self.fields { + for (&id, _) in &self.fields { write_offset(self.parent_buffer.inner_mut(), id as usize, id_size); } // Write field offsets - for &(_, offset) in &self.fields { + for (_, &offset) in &self.fields { write_offset(self.parent_buffer.inner_mut(), offset, offset_size); } @@ -1200,10 +1188,6 @@ mod tests { } outer_object_builder.insert("b", false); - - // note, we can't guarantee an Objects field is sorted by field id. - assert_eq!(outer_object_builder.fields, vec![(1, 0), (0, 10)]); - outer_object_builder.finish(); }