Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 76 additions & 11 deletions parquet-variant/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -597,29 +597,31 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> {
}
}

fn check_pending_field(&mut self) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if there is some sort of RAAI mechanism we can use to ensure that the pending field is completed automatically when the child ListBuilder or ObjectBuilder is dropped

That way the compiler can ensure this type of bug is not possible

Something like

/// If this is a builder for a nested object or list, on `Drop` this object will finish the
/// in progress field for the parent
enum PendingParent {
...
}

impl Drop for PendingParent {
 List(...),
Object {
  field_name: &str,
  offset: usize
 }
}

Then we could create a field on the builder like this

struct ObjectBuilder {
...
  pending: Option<PendingParent>,
}

I think the biggest challenge would be sorting out the type lifetimes

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, I can take a stab at this

Copy link
Copy Markdown
Contributor

@alamb alamb Jun 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let Some((field_name, field_start)) = self.pending.as_ref() else {
return;
};

let field_id = self.metadata_builder.add_field_name(field_name);
self.fields.insert(field_id, *field_start);

self.pending = None;
}

/// Add a field with key and value to the object
///
/// Note: when inserting duplicate keys, the new value overwrites the previous mapping,
/// but the old value remains in the buffer, resulting in a larger variant
pub fn insert<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, key: &str, value: T) {
self.check_pending_field();

let field_id = self.metadata_builder.add_field_name(key);
let field_start = self.buffer.offset();

self.fields.insert(field_id, field_start);
self.buffer.append_non_nested_value(value);
}

fn check_pending_field(&mut self) {
let Some((field_name, field_start)) = self.pending.as_ref() else {
return;
};

let field_id = self.metadata_builder.add_field_name(field_name);
self.fields.insert(field_id, *field_start);

self.pending = None;
}

/// Return a new [`ObjectBuilder`] to add a nested object with the specified
/// key to the object.
pub fn new_object(&mut self, key: &'b str) -> ObjectBuilder {
Expand Down Expand Up @@ -1364,4 +1366,67 @@ mod tests {
assert_eq!(items_list.get(0).unwrap(), Variant::from("apple"));
assert_eq!(items_list.get(1).unwrap(), Variant::from(false));
}

#[test]
fn test_nested_object_with_heterogeneous_fields() {
/*
{
"a": false,
"c": {
"b": "a"
}
"b": true,
}
*/

let mut builder = VariantBuilder::new();
{
let mut outer_object_builder = builder.new_object();

outer_object_builder.insert("a", false);

{
let mut inner_object_builder = outer_object_builder.new_object("c");
inner_object_builder.insert("b", "a");
inner_object_builder.finish();
}

outer_object_builder.insert("b", true);

outer_object_builder.finish();
}

let (metadata, value) = builder.finish();

// note, object fields are now sorted lexigraphically by field name
/*
{
"a": false,
"b": true,
"c": {
"b": "a"
}
}
*/

let variant = Variant::try_new(&metadata, &value).unwrap();
let outer_object = variant.as_object().unwrap();

assert_eq!(outer_object.len(), 3);

assert_eq!(outer_object.field_name(0).unwrap(), "a");
assert_eq!(outer_object.field(0).unwrap(), Variant::from(false));

assert_eq!(outer_object.field_name(2).unwrap(), "c");

let inner_object_variant = outer_object.field(2).unwrap();
let inner_object = inner_object_variant.as_object().unwrap();

assert_eq!(inner_object.len(), 1);
assert_eq!(inner_object.field_name(0).unwrap(), "b");
assert_eq!(inner_object.field(0).unwrap(), Variant::from("a"));

assert_eq!(outer_object.field_name(1).unwrap(), "b");
assert_eq!(outer_object.field(1).unwrap(), Variant::from(true));
}
}
Loading