diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index 1c6ebe23d24f..43b8e59bce5e 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -72,7 +72,183 @@ fn make_room_for_header(buffer: &mut Vec, start_pos: usize, header_size: usi buffer.copy_within(src_start..src_end, dst_start); } -/// Builder for [`Variant`] values +#[derive(Default)] +struct ValueBuffer(Vec); + +impl ValueBuffer { + fn append_null(&mut self) { + self.0.push(primitive_header(VariantPrimitiveType::Null)); + } + + fn append_bool(&mut self, value: bool) { + let primitive_type = if value { + VariantPrimitiveType::BooleanTrue + } else { + VariantPrimitiveType::BooleanFalse + }; + self.0.push(primitive_header(primitive_type)); + } + + fn append_int8(&mut self, value: i8) { + self.0.push(primitive_header(VariantPrimitiveType::Int8)); + self.0.push(value as u8); + } + + fn append_int16(&mut self, value: i16) { + self.0.push(primitive_header(VariantPrimitiveType::Int16)); + self.0.extend_from_slice(&value.to_le_bytes()); + } + + fn append_int32(&mut self, value: i32) { + self.0.push(primitive_header(VariantPrimitiveType::Int32)); + self.0.extend_from_slice(&value.to_le_bytes()); + } + + fn append_int64(&mut self, value: i64) { + self.0.push(primitive_header(VariantPrimitiveType::Int64)); + self.0.extend_from_slice(&value.to_le_bytes()); + } + + fn append_float(&mut self, value: f32) { + self.0.push(primitive_header(VariantPrimitiveType::Float)); + self.0.extend_from_slice(&value.to_le_bytes()); + } + + fn append_double(&mut self, value: f64) { + self.0.push(primitive_header(VariantPrimitiveType::Double)); + self.0.extend_from_slice(&value.to_le_bytes()); + } + + fn append_date(&mut self, value: chrono::NaiveDate) { + self.0.push(primitive_header(VariantPrimitiveType::Date)); + let days_since_epoch = value.signed_duration_since(UNIX_EPOCH_DATE).num_days() as i32; + self.0.extend_from_slice(&days_since_epoch.to_le_bytes()); + } + + fn append_timestamp_micros(&mut self, value: chrono::DateTime) { + self.0 + .push(primitive_header(VariantPrimitiveType::TimestampMicros)); + let micros = value.timestamp_micros(); + self.0.extend_from_slice(µs.to_le_bytes()); + } + + fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) { + self.0 + .push(primitive_header(VariantPrimitiveType::TimestampNtzMicros)); + let micros = value.and_utc().timestamp_micros(); + self.0.extend_from_slice(µs.to_le_bytes()); + } + + fn append_decimal4(&mut self, integer: i32, scale: u8) { + self.0 + .push(primitive_header(VariantPrimitiveType::Decimal4)); + self.0.push(scale); + self.0.extend_from_slice(&integer.to_le_bytes()); + } + + fn append_decimal8(&mut self, integer: i64, scale: u8) { + self.0 + .push(primitive_header(VariantPrimitiveType::Decimal8)); + self.0.push(scale); + self.0.extend_from_slice(&integer.to_le_bytes()); + } + + fn append_decimal16(&mut self, integer: i128, scale: u8) { + self.0 + .push(primitive_header(VariantPrimitiveType::Decimal16)); + self.0.push(scale); + self.0.extend_from_slice(&integer.to_le_bytes()); + } + + fn append_binary(&mut self, value: &[u8]) { + self.0.push(primitive_header(VariantPrimitiveType::Binary)); + self.0 + .extend_from_slice(&(value.len() as u32).to_le_bytes()); + self.0.extend_from_slice(value); + } + + fn append_short_string(&mut self, value: ShortString) { + let inner = value.0; + self.0.push(short_string_header(inner.len())); + self.0.extend_from_slice(inner.as_bytes()); + } + + fn append_string(&mut self, value: &str) { + self.0.push(primitive_header(VariantPrimitiveType::String)); + self.0 + .extend_from_slice(&(value.len() as u32).to_le_bytes()); + self.0.extend_from_slice(value.as_bytes()); + } + + fn offset(&self) -> usize { + self.0.len() + } + + fn append_value<'m, 'd, T: Into>>(&mut self, value: T) { + let variant = value.into(); + match variant { + Variant::Null => self.append_null(), + Variant::BooleanTrue => self.append_bool(true), + Variant::BooleanFalse => self.append_bool(false), + Variant::Int8(v) => self.append_int8(v), + Variant::Int16(v) => self.append_int16(v), + Variant::Int32(v) => self.append_int32(v), + Variant::Int64(v) => self.append_int64(v), + Variant::Date(v) => self.append_date(v), + Variant::TimestampMicros(v) => self.append_timestamp_micros(v), + Variant::TimestampNtzMicros(v) => self.append_timestamp_ntz_micros(v), + Variant::Decimal4(VariantDecimal4 { integer, scale }) => { + self.append_decimal4(integer, scale) + } + Variant::Decimal8(VariantDecimal8 { integer, scale }) => { + self.append_decimal8(integer, scale) + } + Variant::Decimal16(VariantDecimal16 { integer, scale }) => { + self.append_decimal16(integer, scale) + } + Variant::Float(v) => self.append_float(v), + Variant::Double(v) => self.append_double(v), + Variant::Binary(v) => self.append_binary(v), + Variant::String(s) => self.append_string(s), + Variant::ShortString(s) => self.append_short_string(s), + Variant::Object(_) | Variant::List(_) => { + todo!("How does this work with the redesign?"); + } + } + } +} + +#[derive(Default)] +struct MetadataBuilder { + field_name_to_id: BTreeMap, + field_names: Vec, +} + +impl MetadataBuilder { + /// Add field name to dictionary, return its ID + fn add_field_name(&mut self, field_name: &str) -> u32 { + use std::collections::btree_map::Entry; + match self.field_name_to_id.entry(field_name.to_string()) { + Entry::Occupied(entry) => *entry.get(), + Entry::Vacant(entry) => { + let id = self.field_names.len() as u32; + entry.insert(id); + self.field_names.push(field_name.to_string()); + id + } + } + } + + fn num_field_names(&self) -> usize { + self.field_names.len() + } + + fn metadata_size(&self) -> usize { + self.field_names.iter().map(|k| k.len()).sum() + } +} + +/// Top level builder for [`Variant`] values /// /// # Example: create a Primitive Int8 /// ``` @@ -108,9 +284,7 @@ fn make_room_for_header(buffer: &mut Vec, start_pos: usize, header_size: usi /// let (metadata, value) = builder.finish(); /// // use the Variant API to verify the result /// let variant = Variant::try_new(&metadata, &value).unwrap(); -/// let Variant::Object(variant_object) = variant else { -/// panic!("unexpected variant type") -/// }; +/// let variant_object = variant.as_object().unwrap(); /// assert_eq!( /// variant_object.field_by_name("first_name").unwrap(), /// Some(Variant::from("Jiaying")) @@ -137,9 +311,7 @@ fn make_room_for_header(buffer: &mut Vec, start_pos: usize, header_size: usi /// let (metadata, value) = builder.finish(); /// // use the Variant API to verify the result /// let variant = Variant::try_new(&metadata, &value).unwrap(); -/// let Variant::List(variant_list) = variant else { -/// panic!("unexpected variant type") -/// }; +/// let variant_list = variant.as_list().unwrap(); /// // Verify the list contents /// assert_eq!(variant_list.get(0).unwrap(), Variant::Int8(1)); /// assert_eq!(variant_list.get(1).unwrap(), Variant::Int8(2)); @@ -148,189 +320,108 @@ fn make_room_for_header(buffer: &mut Vec, start_pos: usize, header_size: usi /// /// # Example: [`Variant::List`] of [`Variant::Object`]s /// -/// THis example shows how to create an list of objects: +/// This example shows how to create an list of objects: /// ```json /// [ -/// { -/// "first_name": "Jiaying", -/// "last_name": "Li" -/// }, /// { -/// "first_name": "Malthe", -/// "last_name": "Karbo" -/// } +/// "id": 1, +/// "type": "Cauliflower" +/// }, +/// { +/// "id": 2, +/// "type": "Beets" +/// } /// ] /// ``` +/// ``` +/// use parquet_variant::{Variant, VariantBuilder}; +/// let mut builder = VariantBuilder::new(); /// -/// TODO +/// // Create a builder that will write elements to the list +/// let mut list_builder = builder.new_list(); /// +/// { +/// let mut object_builder = list_builder.new_object(); +/// object_builder.append_value("id", 1); +/// object_builder.append_value("type", "Cauliflower"); +/// object_builder.finish(); +/// } +/// +/// { +/// let mut object_builder = list_builder.new_object(); +/// object_builder.append_value("id", 2); +/// object_builder.append_value("type", "Beets"); +/// object_builder.finish(); +/// } +/// +/// list_builder.finish(); +/// // Finish the builder to get the metadata and value +/// let (metadata, value) = builder.finish(); +/// // use the Variant API to verify the result +/// let variant = Variant::try_new(&metadata, &value).unwrap(); +/// let variant_list = variant.as_list().unwrap(); +/// +/// +/// let obj1_variant = variant_list.get(0).unwrap(); +/// let obj1 = obj1_variant.as_object().unwrap(); +/// assert_eq!( +/// obj1.field_by_name("id").unwrap(), +/// Some(Variant::from(1)) +/// ); +/// assert_eq!( +/// obj1.field_by_name("type").unwrap(), +/// Some(Variant::from("Cauliflower")) +/// ); +/// +/// let obj2_variant = variant_list.get(1).unwrap(); +/// let obj2 = obj2_variant.as_object().unwrap(); +/// +/// assert_eq!( +/// obj2.field_by_name("id").unwrap(), +/// Some(Variant::from(2)) +/// ); +/// assert_eq!( +/// obj2.field_by_name("type").unwrap(), +/// Some(Variant::from("Beets")) +/// ); +/// +/// ``` pub struct VariantBuilder { - buffer: Vec, - dict: BTreeMap, - dict_keys: Vec, + buffer: ValueBuffer, + metadata_builder: MetadataBuilder, } impl VariantBuilder { pub fn new() -> Self { Self { - buffer: Vec::new(), - dict: BTreeMap::new(), - dict_keys: Vec::new(), + buffer: ValueBuffer::default(), + metadata_builder: MetadataBuilder::default(), } } - fn append_null(&mut self) { - self.buffer - .push(primitive_header(VariantPrimitiveType::Null)); - } - - fn append_bool(&mut self, value: bool) { - let primitive_type = if value { - VariantPrimitiveType::BooleanTrue - } else { - VariantPrimitiveType::BooleanFalse - }; - self.buffer.push(primitive_header(primitive_type)); - } - - fn append_int8(&mut self, value: i8) { - self.buffer - .push(primitive_header(VariantPrimitiveType::Int8)); - self.buffer.push(value as u8); - } - - fn append_int16(&mut self, value: i16) { - self.buffer - .push(primitive_header(VariantPrimitiveType::Int16)); - self.buffer.extend_from_slice(&value.to_le_bytes()); - } - - fn append_int32(&mut self, value: i32) { - self.buffer - .push(primitive_header(VariantPrimitiveType::Int32)); - self.buffer.extend_from_slice(&value.to_le_bytes()); - } - - fn append_int64(&mut self, value: i64) { - self.buffer - .push(primitive_header(VariantPrimitiveType::Int64)); - self.buffer.extend_from_slice(&value.to_le_bytes()); - } - - fn append_float(&mut self, value: f32) { - self.buffer - .push(primitive_header(VariantPrimitiveType::Float)); - self.buffer.extend_from_slice(&value.to_le_bytes()); - } - - fn append_double(&mut self, value: f64) { - self.buffer - .push(primitive_header(VariantPrimitiveType::Double)); - self.buffer.extend_from_slice(&value.to_le_bytes()); - } - - fn append_date(&mut self, value: chrono::NaiveDate) { - self.buffer - .push(primitive_header(VariantPrimitiveType::Date)); - let days_since_epoch = value.signed_duration_since(UNIX_EPOCH_DATE).num_days() as i32; - self.buffer - .extend_from_slice(&days_since_epoch.to_le_bytes()); - } - - fn append_timestamp_micros(&mut self, value: chrono::DateTime) { - self.buffer - .push(primitive_header(VariantPrimitiveType::TimestampMicros)); - let micros = value.timestamp_micros(); - self.buffer.extend_from_slice(µs.to_le_bytes()); - } - - fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) { - self.buffer - .push(primitive_header(VariantPrimitiveType::TimestampNtzMicros)); - let micros = value.and_utc().timestamp_micros(); - self.buffer.extend_from_slice(µs.to_le_bytes()); - } - - fn append_decimal4(&mut self, integer: i32, scale: u8) { - self.buffer - .push(primitive_header(VariantPrimitiveType::Decimal4)); - self.buffer.push(scale); - self.buffer.extend_from_slice(&integer.to_le_bytes()); - } - - fn append_decimal8(&mut self, integer: i64, scale: u8) { - self.buffer - .push(primitive_header(VariantPrimitiveType::Decimal8)); - self.buffer.push(scale); - self.buffer.extend_from_slice(&integer.to_le_bytes()); - } - - fn append_decimal16(&mut self, integer: i128, scale: u8) { - self.buffer - .push(primitive_header(VariantPrimitiveType::Decimal16)); - self.buffer.push(scale); - self.buffer.extend_from_slice(&integer.to_le_bytes()); - } - - fn append_binary(&mut self, value: &[u8]) { - self.buffer - .push(primitive_header(VariantPrimitiveType::Binary)); - self.buffer - .extend_from_slice(&(value.len() as u32).to_le_bytes()); - self.buffer.extend_from_slice(value); - } - - fn append_short_string(&mut self, value: ShortString) { - let inner = value.0; - self.buffer.push(short_string_header(inner.len())); - self.buffer.extend_from_slice(inner.as_bytes()); - } - - fn append_string(&mut self, value: &str) { - self.buffer - .push(primitive_header(VariantPrimitiveType::String)); - self.buffer - .extend_from_slice(&(value.len() as u32).to_le_bytes()); - self.buffer.extend_from_slice(value.as_bytes()); - } - - /// Add key to dictionary, return its ID - fn add_key(&mut self, key: &str) -> u32 { - use std::collections::btree_map::Entry; - match self.dict.entry(key.to_string()) { - Entry::Occupied(entry) => *entry.get(), - Entry::Vacant(entry) => { - let id = self.dict_keys.len() as u32; - entry.insert(id); - self.dict_keys.push(key.to_string()); - id - } - } - } - - fn offset(&self) -> usize { - self.buffer.len() - } - /// Create an [`ListBuilder`] for creating [`Variant::List`] values. /// /// See the examples on [`VariantBuilder`] for usage. pub fn new_list(&mut self) -> ListBuilder { - ListBuilder::new(self) + ListBuilder::new(&mut self.buffer, &mut self.metadata_builder) } /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values. /// /// See the examples on [`VariantBuilder`] for usage. pub fn new_object(&mut self) -> ObjectBuilder { - ObjectBuilder::new(self) + ObjectBuilder::new(&mut self.buffer, &mut self.metadata_builder) + } + + pub fn append_value<'m, 'd, T: Into>>(&mut self, value: T) { + self.buffer.append_value(value); } pub fn finish(self) -> (Vec, Vec) { - let nkeys = self.dict_keys.len(); + let nkeys = self.metadata_builder.num_field_names(); // Calculate metadata size - let total_dict_size: usize = self.dict_keys.iter().map(|k| k.len()).sum(); + let total_dict_size: usize = self.metadata_builder.metadata_size(); // Determine appropriate offset size based on the larger of dict size or total string size let max_offset = std::cmp::max(total_dict_size, nkeys); @@ -351,7 +442,7 @@ impl VariantBuilder { // Write offsets and string data let mut cur_offset = 0; - for (i, key) in self.dict_keys.iter().enumerate() { + for (i, key) in self.metadata_builder.field_names.iter().enumerate() { write_offset( &mut metadata[offset_start + i * offset_size as usize..], cur_offset, @@ -368,51 +459,7 @@ impl VariantBuilder { offset_size, ); - (metadata, self.buffer) - } - - pub fn append_value<'m, 'd, T: Into>>(&mut self, value: T) { - let variant = value.into(); - match variant { - Variant::Null => self.append_null(), - Variant::BooleanTrue => self.append_bool(true), - Variant::BooleanFalse => self.append_bool(false), - Variant::Int8(v) => self.append_int8(v), - Variant::Int16(v) => self.append_int16(v), - Variant::Int32(v) => self.append_int32(v), - Variant::Int64(v) => self.append_int64(v), - Variant::Date(v) => self.append_date(v), - Variant::TimestampMicros(v) => self.append_timestamp_micros(v), - Variant::TimestampNtzMicros(v) => self.append_timestamp_ntz_micros(v), - Variant::Decimal4(VariantDecimal4 { integer, scale }) => { - self.append_decimal4(integer, scale) - } - Variant::Decimal8(VariantDecimal8 { integer, scale }) => { - self.append_decimal8(integer, scale) - } - Variant::Decimal16(VariantDecimal16 { integer, scale }) => { - self.append_decimal16(integer, scale) - } - Variant::Float(v) => self.append_float(v), - Variant::Double(v) => self.append_double(v), - Variant::Binary(v) => self.append_binary(v), - Variant::String(s) => self.append_string(s), - Variant::ShortString(s) => self.append_short_string(s), - Variant::Object(obj) => { - let mut obj_builder = self.new_object(); - for (key, value) in obj.iter() { - obj_builder.append_value(key, value); - } - obj_builder.finish(); - } - Variant::List(list) => { - let mut list_builder = self.new_list(); - for value in list.iter() { - list_builder.append_value(value); - } - list_builder.finish(); - } - } + (metadata, self.buffer.0) } } @@ -426,59 +473,101 @@ impl Default for VariantBuilder { /// /// See the examples on [`VariantBuilder`] for usage. pub struct ListBuilder<'a> { - parent: &'a mut VariantBuilder, - start_pos: usize, + parent_buffer: &'a mut ValueBuffer, + metadata_builder: &'a mut MetadataBuilder, offsets: Vec, + buffer: ValueBuffer, + pending: bool, } impl<'a> ListBuilder<'a> { - fn new(parent: &'a mut VariantBuilder) -> Self { - let start_pos = parent.offset(); + fn new(parent_buffer: &'a mut ValueBuffer, metadata_builder: &'a mut MetadataBuilder) -> Self { Self { - parent, - start_pos, + parent_buffer, + metadata_builder, offsets: vec![0], + buffer: ValueBuffer::default(), + pending: false, + } + } + + fn check_new_offset(&mut self) { + if !self.pending { + return; } + + let element_end = self.buffer.offset(); + self.offsets.push(element_end); + + self.pending = false; + } + + pub fn new_object(&mut self) -> ObjectBuilder { + self.check_new_offset(); + + let obj_builder = ObjectBuilder::new(&mut self.buffer, self.metadata_builder); + self.pending = true; + + obj_builder + } + + pub fn new_list(&mut self) -> ListBuilder { + self.check_new_offset(); + + let list_builder = ListBuilder::new(&mut self.buffer, self.metadata_builder); + self.pending = true; + + list_builder } pub fn append_value<'m, 'd, T: Into>>(&mut self, value: T) { - self.parent.append_value(value); - let element_end = self.parent.offset() - self.start_pos; + self.check_new_offset(); + + self.buffer.append_value(value); + let element_end = self.buffer.offset(); self.offsets.push(element_end); } - pub fn finish(self) { - let data_size = self.parent.offset() - self.start_pos; + pub fn finish(mut self) { + self.check_new_offset(); + + let data_size = self.buffer.offset(); let num_elements = self.offsets.len() - 1; let is_large = num_elements > u8::MAX as usize; let size_bytes = if is_large { 4 } else { 1 }; let offset_size = int_size(data_size); let header_size = 1 + size_bytes + (num_elements + 1) * offset_size as usize; - make_room_for_header(&mut self.parent.buffer, self.start_pos, header_size); + let parent_start_pos = self.parent_buffer.offset(); + + make_room_for_header(&mut self.parent_buffer.0, parent_start_pos, header_size); // Write header - let mut pos = self.start_pos; - self.parent.buffer[pos] = array_header(is_large, offset_size); + let mut pos = parent_start_pos; + self.parent_buffer.0[pos] = array_header(is_large, offset_size); pos += 1; if is_large { - self.parent.buffer[pos..pos + 4].copy_from_slice(&(num_elements as u32).to_le_bytes()); + self.parent_buffer.0[pos..pos + 4] + .copy_from_slice(&(num_elements as u32).to_le_bytes()); pos += 4; } else { - self.parent.buffer[pos] = num_elements as u8; + self.parent_buffer.0[pos] = num_elements as u8; pos += 1; } // Write offsets for offset in &self.offsets { write_offset( - &mut self.parent.buffer[pos..pos + offset_size as usize], + &mut self.parent_buffer.0[pos..pos + offset_size as usize], *offset, offset_size, ); pos += offset_size as usize; } + + // Append values + self.parent_buffer.0.extend_from_slice(&self.buffer.0); } } @@ -486,40 +575,41 @@ impl<'a> ListBuilder<'a> { /// /// See the examples on [`VariantBuilder`] for usage. pub struct ObjectBuilder<'a> { - parent: &'a mut VariantBuilder, - start_pos: usize, + parent_buffer: &'a mut ValueBuffer, + metadata_builder: &'a mut MetadataBuilder, fields: BTreeMap, // (field_id, offset) + buffer: ValueBuffer, } impl<'a> ObjectBuilder<'a> { - fn new(parent: &'a mut VariantBuilder) -> Self { - let start_pos = parent.offset(); + fn new(parent_buffer: &'a mut ValueBuffer, metadata_builder: &'a mut MetadataBuilder) -> Self { Self { - parent, - start_pos, + parent_buffer, + metadata_builder, fields: BTreeMap::new(), + buffer: ValueBuffer::default(), } } /// Add a field with key and value to the object pub fn append_value<'m, 'd, T: Into>>(&mut self, key: &str, value: T) { - let id = self.parent.add_key(key); - let field_start = self.parent.offset() - self.start_pos; - self.parent.append_value(value); - let res = self.fields.insert(id, field_start); + let field_id = self.metadata_builder.add_field_name(key); + let field_start = self.buffer.offset(); + self.buffer.append_value(value); + let res = self.fields.insert(field_id, field_start); debug_assert!(res.is_none()); } /// Finalize object with sorted fields pub fn finish(self) { - let data_size = self.parent.offset() - self.start_pos; + let data_size = self.buffer.offset(); let num_fields = self.fields.len(); let is_large = num_fields > u8::MAX as usize; let size_bytes = if is_large { 4 } else { 1 }; let field_ids_by_sorted_field_name = self - .parent - .dict + .metadata_builder + .field_name_to_id .iter() .filter_map(|(_, id)| self.fields.contains_key(id).then_some(*id)) .collect::>(); @@ -534,25 +624,27 @@ impl<'a> ObjectBuilder<'a> { + num_fields * id_size as usize + (num_fields + 1) * offset_size as usize; - make_room_for_header(&mut self.parent.buffer, self.start_pos, header_size); + let parent_start_pos = self.parent_buffer.offset(); + + make_room_for_header(&mut self.parent_buffer.0, parent_start_pos, header_size); // Write header - let mut pos = self.start_pos; - self.parent.buffer[pos] = object_header(is_large, id_size, offset_size); + let mut pos = parent_start_pos; + self.parent_buffer.0[pos] = object_header(is_large, id_size, offset_size); pos += 1; if is_large { - self.parent.buffer[pos..pos + 4].copy_from_slice(&(num_fields as u32).to_le_bytes()); + self.parent_buffer.0[pos..pos + 4].copy_from_slice(&(num_fields as u32).to_le_bytes()); pos += 4; } else { - self.parent.buffer[pos] = num_fields as u8; + self.parent_buffer.0[pos] = num_fields as u8; pos += 1; } // Write field IDs (sorted order) for id in &field_ids_by_sorted_field_name { write_offset( - &mut self.parent.buffer[pos..pos + id_size as usize], + &mut self.parent_buffer.0[pos..pos + id_size as usize], *id as usize, id_size, ); @@ -563,17 +655,19 @@ impl<'a> ObjectBuilder<'a> { for id in &field_ids_by_sorted_field_name { let &offset = self.fields.get(id).unwrap(); write_offset( - &mut self.parent.buffer[pos..pos + offset_size as usize], + &mut self.parent_buffer.0[pos..pos + offset_size as usize], offset, offset_size, ); pos += offset_size as usize; } write_offset( - &mut self.parent.buffer[pos..pos + offset_size as usize], + &mut self.parent_buffer.0[pos..pos + offset_size as usize], data_size, offset_size, ); + + self.parent_buffer.0.extend_from_slice(&self.buffer.0); } } @@ -773,10 +867,9 @@ mod tests { assert_eq!(fields_map, vec![0, 1]); // dict is ordered by field names - // NOTE: when we support nested objects, we'll want to perform a filter by fields_map field ids let dict_metadata = obj - .parent - .dict + .metadata_builder + .field_name_to_id .iter() .map(|(f, i)| (f.as_str(), *i)) .collect::>(); @@ -785,8 +878,8 @@ mod tests { // dict_keys is ordered by insertion order (field id) let dict_keys = obj - .parent - .dict_keys + .metadata_builder + .field_names .iter() .map(|k| k.as_str()) .collect::>(); @@ -801,10 +894,9 @@ mod tests { assert_eq!(fields_map, vec![0, 1, 2]); // dict is ordered by field names - // NOTE: when we support nested objects, we'll want to perform a filter by fields_map field ids let dict_metadata = obj - .parent - .dict + .metadata_builder + .field_name_to_id .iter() .map(|(f, i)| (f.as_str(), *i)) .collect::>(); @@ -816,8 +908,8 @@ mod tests { // dict_keys is ordered by insertion order (field id) let dict_keys = obj - .parent - .dict_keys + .metadata_builder + .field_names .iter() .map(|k| k.as_str()) .collect::>(); @@ -830,39 +922,250 @@ mod tests { } #[test] - fn test_append_object() { - let (object_metadata, object_value) = { - let mut builder = VariantBuilder::new(); - let mut obj = builder.new_object(); - obj.append_value("name", "John"); - obj.finish(); - builder.finish() - }; - let object_variant = Variant::try_new(&object_metadata, &object_value).unwrap(); + fn test_nested_list() { + let mut builder = VariantBuilder::new(); + + let mut outer_list_builder = builder.new_list(); + + { + let mut inner_list_builder = outer_list_builder.new_list(); + + inner_list_builder.append_value("a"); + inner_list_builder.append_value("b"); + inner_list_builder.append_value("c"); + inner_list_builder.append_value("d"); + + inner_list_builder.finish(); + } + + outer_list_builder.finish(); + + let (metadata, value) = builder.finish(); + + let variant = Variant::try_new(&metadata, &value).unwrap(); + let outer_list = variant.as_list().unwrap(); + + assert_eq!(outer_list.len(), 1); + + let inner_variant = outer_list.get(0).unwrap(); + let inner_list = inner_variant.as_list().unwrap(); + + assert_eq!( + vec![ + Variant::from("a"), + Variant::from("b"), + Variant::from("c"), + Variant::from("d"), + ], + inner_list.iter().collect::>() + ); + } + + #[test] + fn test_super_nested_list() { + /* + [[[[[1]]]]] + */ let mut builder = VariantBuilder::new(); - builder.append_value(object_variant.clone()); + { + let mut list_builder1 = builder.new_list(); + { + let mut list_builder2 = list_builder1.new_list(); + { + let mut list_builder3 = list_builder2.new_list(); + { + let mut list_builder4 = list_builder3.new_list(); + { + let mut list_builder5 = list_builder4.new_list(); + list_builder5.append_value(1); + list_builder5.finish(); + } + list_builder4.finish(); + } + list_builder3.finish(); + } + list_builder2.finish(); + } + list_builder1.finish(); + } + let (metadata, value) = builder.finish(); + let variant = Variant::try_new(&metadata, &value).unwrap(); - assert_eq!(variant, object_variant); + let list1 = variant.as_list().unwrap(); + assert_eq!(list1.len(), 1); + + let list2_variant = list1.get(0).unwrap(); + let list2 = list2_variant.as_list().unwrap(); + assert_eq!(list2.len(), 1); + + let list3_variant = list2.get(0).unwrap(); + let list3 = list3_variant.as_list().unwrap(); + assert_eq!(list3.len(), 1); + + let list4_variant = list3.get(0).unwrap(); + let list4 = list4_variant.as_list().unwrap(); + assert_eq!(list4.len(), 1); + + let list5_variant = list4.get(0).unwrap(); + let list5 = list5_variant.as_list().unwrap(); + assert_eq!(list5.len(), 1); + + assert_eq!(list5.len(), 1); + + assert_eq!(list5.get(0).unwrap(), Variant::from(1)); } #[test] - fn test_append_list() { - let (list_metadata, list_value) = { - let mut builder = VariantBuilder::new(); - let mut list = builder.new_list(); - list.append_value(1i8); - list.append_value(2i8); - list.finish(); - builder.finish() - }; - let list_variant = Variant::try_new(&list_metadata, &list_value).unwrap(); + fn test_object_list() { + let mut builder = VariantBuilder::new(); + + let mut list_builder = builder.new_list(); + + { + let mut object_builder = list_builder.new_object(); + object_builder.append_value("id", 1); + object_builder.append_value("type", "Cauliflower"); + object_builder.finish(); + } + + { + let mut object_builder = list_builder.new_object(); + object_builder.append_value("id", 2); + object_builder.append_value("type", "Beets"); + object_builder.finish(); + } + + list_builder.finish(); + + let (metadata, value) = builder.finish(); + + let variant = Variant::try_new(&metadata, &value).unwrap(); + let list = variant.as_list().unwrap(); + + assert_eq!(list.len(), 2); + + let obj1_variant = list.get(0).unwrap(); + let obj1 = obj1_variant.as_object().unwrap(); + + assert_eq!( + vec![ + ("id", Variant::from(1)), + ("type", Variant::from("Cauliflower")), + ], + obj1.iter().collect::>() + ); + + let obj2_variant = list.get(1).unwrap(); + let obj2 = obj2_variant.as_object().unwrap(); + + assert_eq!( + vec![("id", Variant::from(2)), ("type", Variant::from("Beets")),], + obj2.iter().collect::>() + ); + } + + #[test] + fn test_object_list2() { + let mut builder = VariantBuilder::new(); + + let mut list_builder = builder.new_list(); + + { + let mut object_builder = list_builder.new_object(); + object_builder.append_value("a", 1); + object_builder.finish(); + } + + { + let mut object_builder = list_builder.new_object(); + object_builder.append_value("b", 2); + object_builder.finish(); + } + + list_builder.finish(); + + let (metadata, value) = builder.finish(); + + let variant = Variant::try_new(&metadata, &value).unwrap(); + let list = variant.as_list().unwrap(); + assert_eq!(list.len(), 2); + + let obj1_variant = list.get(0).unwrap(); + let obj1 = obj1_variant.as_object().unwrap(); + assert_eq!( + vec![("a", Variant::from(1)),], + obj1.iter().collect::>() + ); + + let obj2_variant = list.get(1).unwrap(); + let obj2 = obj2_variant.as_object().unwrap(); + assert_eq!( + vec![("b", Variant::from(2)),], + obj2.iter().collect::>() + ); + } + + #[test] + fn test_hetergenous_list() { + /* + [ + 1, + { "a": 1 }, + 2, + { "b": 2}, + 3 + ] + */ let mut builder = VariantBuilder::new(); - builder.append_value(list_variant.clone()); + + let mut list_builder = builder.new_list(); + + list_builder.append_value(1); + + { + let mut object_builder = list_builder.new_object(); + object_builder.append_value("a", 1); + object_builder.finish(); + } + + list_builder.append_value(2); + + { + let mut object_builder = list_builder.new_object(); + object_builder.append_value("b", 2); + object_builder.finish(); + } + + list_builder.append_value(3); + + list_builder.finish(); + let (metadata, value) = builder.finish(); + let variant = Variant::try_new(&metadata, &value).unwrap(); - assert_eq!(variant, list_variant); + let list = variant.as_list().unwrap(); + assert_eq!(list.len(), 5); + assert_eq!(list.get(0).unwrap(), Variant::from(1)); + + let obj1_variant = list.get(1).unwrap(); + let obj1 = obj1_variant.as_object().unwrap(); + assert_eq!( + vec![("a", Variant::from(1)),], + obj1.iter().collect::>() + ); + + assert_eq!(list.get(2).unwrap(), Variant::from(2)); + + let obj2_variant = list.get(3).unwrap(); + let obj2 = obj2_variant.as_object().unwrap(); + assert_eq!( + vec![("b", Variant::from(2)),], + obj2.iter().collect::>() + ); + + assert_eq!(list.get(4).unwrap(), Variant::from(3)); } }