Skip to content
Merged
10 changes: 5 additions & 5 deletions parquet-variant-compute/benches/variant_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use arrow::util::test_util::seedable_rng;
use criterion::{criterion_group, criterion_main, Criterion};
use parquet_variant::{Variant, VariantBuilder};
use parquet_variant_compute::variant_get::{variant_get, GetOptions};
use parquet_variant_compute::{batch_json_string_to_variant, VariantArray, VariantArrayBuilder};
use parquet_variant_compute::{json_to_variant, VariantArray, VariantArrayBuilder};
use rand::distr::Alphanumeric;
use rand::rngs::StdRng;
use rand::Rng;
Expand All @@ -34,7 +34,7 @@ fn benchmark_batch_json_string_to_variant(c: &mut Criterion) {
"batch_json_string_to_variant repeated_struct 8k string",
|b| {
b.iter(|| {
let _ = batch_json_string_to_variant(&array_ref).unwrap();
let _ = json_to_variant(&array_ref).unwrap();
});
},
);
Expand All @@ -43,7 +43,7 @@ fn benchmark_batch_json_string_to_variant(c: &mut Criterion) {
let array_ref: ArrayRef = Arc::new(input_array);
c.bench_function("batch_json_string_to_variant json_list 8k string", |b| {
b.iter(|| {
let _ = batch_json_string_to_variant(&array_ref).unwrap();
let _ = json_to_variant(&array_ref).unwrap();
});
});

Expand All @@ -60,7 +60,7 @@ fn benchmark_batch_json_string_to_variant(c: &mut Criterion) {
let array_ref: ArrayRef = Arc::new(input_array);
c.bench_function(&id, |b| {
b.iter(|| {
let _ = batch_json_string_to_variant(&array_ref).unwrap();
let _ = json_to_variant(&array_ref).unwrap();
});
});

Expand All @@ -77,7 +77,7 @@ fn benchmark_batch_json_string_to_variant(c: &mut Criterion) {
let array_ref: ArrayRef = Arc::new(input_array);
c.bench_function(&id, |b| {
b.iter(|| {
let _ = batch_json_string_to_variant(&array_ref).unwrap();
let _ = json_to_variant(&array_ref).unwrap();
});
});
}
Expand Down
20 changes: 10 additions & 10 deletions parquet-variant-compute/src/from_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
use crate::{VariantArray, VariantArrayBuilder};
use arrow::array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray};
use arrow_schema::ArrowError;
use parquet_variant_json::json_to_variant;
use parquet_variant_json::JsonToVariant;

/// Macro to convert string array to variant array
macro_rules! string_array_to_variant {
Expand All @@ -31,7 +31,7 @@ macro_rules! string_array_to_variant {
$builder.append_null();
} else {
let mut vb = $builder.variant_builder();
json_to_variant($array.value(i), &mut vb)?;
vb.append_json($array.value(i))?;
vb.finish()
}
}
Expand All @@ -46,7 +46,7 @@ macro_rules! string_array_to_variant {
/// - [`StringArray`]
/// - [`LargeStringArray`]
/// - [`StringViewArray`]
pub fn batch_json_string_to_variant(input: &ArrayRef) -> Result<VariantArray, ArrowError> {
pub fn json_to_variant(input: &ArrayRef) -> Result<VariantArray, ArrowError> {
let mut variant_array_builder = VariantArrayBuilder::new(input.len());

// Try each string array type in sequence
Expand All @@ -68,14 +68,14 @@ pub fn batch_json_string_to_variant(input: &ArrayRef) -> Result<VariantArray, Ar

#[cfg(test)]
mod test {
use crate::batch_json_string_to_variant;
use crate::json_to_variant;
use arrow::array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray};
use arrow_schema::ArrowError;
use parquet_variant::{Variant, VariantBuilder};
use std::sync::Arc;

#[test]
fn test_batch_json_string_to_variant() -> Result<(), ArrowError> {
fn test_json_to_variant() -> Result<(), ArrowError> {
let input = StringArray::from(vec![
Some("1"),
None,
Expand All @@ -84,7 +84,7 @@ mod test {
None,
]);
let array_ref: ArrayRef = Arc::new(input);
let variant_array = batch_json_string_to_variant(&array_ref).unwrap();
let variant_array = json_to_variant(&array_ref).unwrap();

let metadata_array = variant_array.metadata_field();
let value_array = variant_array.value_field().expect("value field");
Expand Down Expand Up @@ -124,7 +124,7 @@ mod test {
}

#[test]
fn test_batch_json_string_to_variant_large_string() -> Result<(), ArrowError> {
fn test_json_to_variant_large_string() -> Result<(), ArrowError> {
let input = LargeStringArray::from(vec![
Some("1"),
None,
Expand All @@ -133,7 +133,7 @@ mod test {
None,
]);
let array_ref: ArrayRef = Arc::new(input);
let variant_array = batch_json_string_to_variant(&array_ref).unwrap();
let variant_array = json_to_variant(&array_ref).unwrap();

let metadata_array = variant_array.metadata_field();
let value_array = variant_array.value_field().expect("value field");
Expand Down Expand Up @@ -173,7 +173,7 @@ mod test {
}

#[test]
fn test_batch_json_string_to_variant_string_view() -> Result<(), ArrowError> {
fn test_json_to_variant_string_view() -> Result<(), ArrowError> {
let input = StringViewArray::from(vec![
Some("1"),
None,
Expand All @@ -182,7 +182,7 @@ mod test {
None,
]);
let array_ref: ArrayRef = Arc::new(input);
let variant_array = batch_json_string_to_variant(&array_ref).unwrap();
let variant_array = json_to_variant(&array_ref).unwrap();

let metadata_array = variant_array.metadata_field();
let value_array = variant_array.value_field().expect("value field");
Expand Down
8 changes: 4 additions & 4 deletions parquet-variant-compute/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
//! ## Main APIs
//! - [`VariantArray`] : Represents an array of `Variant` values.
//! - [`VariantArrayBuilder`]: For building [`VariantArray`]
//! - [`batch_json_string_to_variant`]: Function to convert a batch of JSON strings to a `VariantArray`.
//! - [`batch_variant_to_json_string`]: Function to convert a `VariantArray` to a batch of JSON strings.
//! - [`json_to_variant`]: Function to convert a batch of JSON strings to a `VariantArray`.
//! - [`variant_to_json`]: Function to convert a `VariantArray` to a batch of JSON strings.
//! - [`cast_to_variant`]: Module to cast other Arrow arrays to `VariantArray`.
//! - [`variant_get`]: Module to get values from a `VariantArray` using a specified [`VariantPath`]
//!
Expand All @@ -45,5 +45,5 @@ pub mod variant_get;
pub use variant_array::{ShreddingState, VariantArray};
pub use variant_array_builder::{VariantArrayBuilder, VariantArrayVariantBuilder};

pub use from_json::batch_json_string_to_variant;
pub use to_json::batch_variant_to_json_string;
pub use from_json::json_to_variant;
pub use to_json::variant_to_json;
12 changes: 6 additions & 6 deletions parquet-variant-compute/src/to_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer, ScalarBuffer};
use arrow::datatypes::DataType;
use arrow_schema::ArrowError;
use parquet_variant::Variant;
use parquet_variant_json::variant_to_json;
use parquet_variant_json::VariantToJson;

/// Transform a batch of Variant represented as STRUCT<metadata: BINARY, value: BINARY> to a batch
/// of JSON strings where nulls are preserved. The JSON strings in the input must be valid.
pub fn batch_variant_to_json_string(input: &ArrayRef) -> Result<StringArray, ArrowError> {
pub fn variant_to_json(input: &ArrayRef) -> Result<StringArray, ArrowError> {
let struct_array = input
.as_any()
.downcast_ref::<StructArray>()
Expand Down Expand Up @@ -83,7 +83,7 @@ pub fn batch_variant_to_json_string(input: &ArrayRef) -> Result<StringArray, Arr
let value = value_array.value(i);
let variant = Variant::new(metadata, value);
let start_len = json_buffer.len();
variant_to_json(&mut json_buffer, &variant)?;
variant.to_json(&mut json_buffer)?;
let written = (json_buffer.len() - start_len) as i32;
current_offset += written;
offsets.push(current_offset);
Expand All @@ -104,7 +104,7 @@ pub fn batch_variant_to_json_string(input: &ArrayRef) -> Result<StringArray, Arr

#[cfg(test)]
mod test {
use crate::batch_variant_to_json_string;
use crate::variant_to_json;
use arrow::array::{Array, ArrayRef, BinaryBuilder, BooleanBufferBuilder, StructArray};
use arrow::buffer::NullBuffer;
use arrow::datatypes::DataType;
Expand All @@ -113,7 +113,7 @@ mod test {
use std::sync::Arc;

#[test]
fn test_batch_variant_to_json_string() {
fn test_variant_to_json() {
let mut metadata_builder = BinaryBuilder::new();
let mut value_builder = BinaryBuilder::new();

Expand Down Expand Up @@ -161,7 +161,7 @@ mod test {

let input = Arc::new(struct_array) as ArrayRef;

let result = batch_variant_to_json_string(&input).unwrap();
let result = variant_to_json(&input).unwrap();

// Expected output: ["0", null, "{\"a\":32}", "null", null]
let expected = vec![Some("0"), None, Some("{\"a\":32}"), Some("null"), None];
Expand Down
6 changes: 3 additions & 3 deletions parquet-variant-compute/src/variant_get/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ mod test {
use arrow_schema::{DataType, Field, FieldRef, Fields};
use parquet_variant::{Variant, VariantPath};

use crate::batch_json_string_to_variant;
use crate::json_to_variant;
use crate::VariantArray;

use super::{variant_get, GetOptions};
Expand All @@ -121,14 +121,14 @@ mod test {
// Create input array from JSON string
let input_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(input_json)]));
let input_variant_array_ref: ArrayRef =
Arc::new(batch_json_string_to_variant(&input_array_ref).unwrap());
Arc::new(json_to_variant(&input_array_ref).unwrap());

let result =
variant_get(&input_variant_array_ref, GetOptions::new_with_path(path)).unwrap();

// Create expected array from JSON string
let expected_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(expected_json)]));
let expected_variant_array = batch_json_string_to_variant(&expected_array_ref).unwrap();
let expected_variant_array = json_to_variant(&expected_array_ref).unwrap();

let result_array: &VariantArray = result.as_any().downcast_ref().unwrap();
assert_eq!(
Expand Down
49 changes: 23 additions & 26 deletions parquet-variant-json/src/from_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,14 @@ use arrow_schema::ArrowError;
use parquet_variant::{ListBuilder, ObjectBuilder, Variant, VariantBuilderExt};
use serde_json::{Number, Value};

/// Converts a JSON string to Variant to a [`VariantBuilderExt`], such as
/// Converts a JSON string to Variant using a [`VariantBuilderExt`], such as
/// [`VariantBuilder`].
///
/// The resulting `value` and `metadata` buffers can be
/// extracted using `builder.finish()`
///
/// # Arguments
/// * `json` - The JSON string to parse as Variant.
/// * `variant_builder` - Object of type `VariantBuilder` used to build the variant from the JSON
/// string
///
///
/// # Returns
///
Expand All @@ -42,43 +39,43 @@ use serde_json::{Number, Value};
///
/// ```rust
/// # use parquet_variant::VariantBuilder;
/// # use parquet_variant_json::{
/// # json_to_variant, variant_to_json_string, variant_to_json, variant_to_json_value
/// # };
/// # use parquet_variant_json::{JsonToVariant, VariantToJson};
///
/// let mut variant_builder = VariantBuilder::new();
/// let person_string = "{\"name\":\"Alice\", \"age\":30, ".to_string()
/// + "\"email\":\"alice@example.com\", \"is_active\": true, \"score\": 95.7,"
/// + "\"additional_info\": null}";
/// json_to_variant(&person_string, &mut variant_builder)?;
/// variant_builder.append_json(&person_string)?;
///
/// let (metadata, value) = variant_builder.finish();
///
/// let variant = parquet_variant::Variant::try_new(&metadata, &value)?;
///
/// let json_result = variant_to_json_string(&variant)?;
/// let json_value = variant_to_json_value(&variant)?;
/// let json_result = variant.to_json_string()?;
/// let json_value = variant.to_json_value()?;
///
/// let mut buffer = Vec::new();
/// variant_to_json(&mut buffer, &variant)?;
/// variant.to_json(&mut buffer)?;
/// let buffer_result = String::from_utf8(buffer)?;
/// assert_eq!(json_result, "{\"additional_info\":null,\"age\":30,".to_string() +
/// "\"email\":\"alice@example.com\",\"is_active\":true,\"name\":\"Alice\",\"score\":95.7}");
/// assert_eq!(json_result, buffer_result);
/// assert_eq!(json_result, serde_json::to_string(&json_value)?);
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
pub fn json_to_variant(json: &str, builder: &mut impl VariantBuilderExt) -> Result<(), ArrowError> {
let json: Value = serde_json::from_str(json)
.map_err(|e| ArrowError::InvalidArgumentError(format!("JSON format error: {e}")))?;

build_json(&json, builder)?;
Ok(())
pub trait JsonToVariant {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is quite clever

/// Create a Variant from a JSON string
fn append_json(&mut self, json: &str) -> Result<(), ArrowError>;
}

fn build_json(json: &Value, builder: &mut impl VariantBuilderExt) -> Result<(), ArrowError> {
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed redundant build_json

append_json(json, builder)?;
Ok(())
impl<T: VariantBuilderExt> JsonToVariant for T {
fn append_json(&mut self, json: &str) -> Result<(), ArrowError> {
let json: Value = serde_json::from_str(json)
.map_err(|e| ArrowError::InvalidArgumentError(format!("JSON format error: {e}")))?;

append_json(&json, self)?;
Ok(())
}
}

fn variant_from_number<'m, 'v>(n: &Number) -> Result<Variant<'m, 'v>, ArrowError> {
Expand Down Expand Up @@ -157,7 +154,7 @@ impl VariantBuilderExt for ObjectFieldBuilder<'_, '_, '_> {
#[cfg(test)]
mod test {
use super::*;
use crate::variant_to_json_string;
use crate::VariantToJson;
use arrow_schema::ArrowError;
use parquet_variant::{
ShortString, Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8,
Expand All @@ -171,7 +168,7 @@ mod test {
impl JsonToVariantTest<'_> {
fn run(self) -> Result<(), ArrowError> {
let mut variant_builder = VariantBuilder::new();
json_to_variant(self.json, &mut variant_builder)?;
variant_builder.append_json(self.json)?;
let (metadata, value) = variant_builder.finish();
let variant = Variant::try_new(&metadata, &value)?;
assert_eq!(variant, self.expected);
Expand Down Expand Up @@ -622,10 +619,10 @@ mod test {
);
// Manually verify raw JSON value size
let mut variant_builder = VariantBuilder::new();
json_to_variant(&json, &mut variant_builder)?;
variant_builder.append_json(&json)?;
let (metadata, value) = variant_builder.finish();
let v = Variant::try_new(&metadata, &value)?;
let output_string = variant_to_json_string(&v)?;
let output_string = v.to_json_string()?;
assert_eq!(output_string, json);
// Verify metadata size = 1 + 2 + 2 * 497 + 3 * 496
assert_eq!(metadata.len(), 2485);
Expand Down Expand Up @@ -663,10 +660,10 @@ mod test {
fn test_json_to_variant_unicode() -> Result<(), ArrowError> {
let json = "{\"爱\":\"अ\",\"a\":1}";
let mut variant_builder = VariantBuilder::new();
json_to_variant(json, &mut variant_builder)?;
variant_builder.append_json(json)?;
let (metadata, value) = variant_builder.finish();
let v = Variant::try_new(&metadata, &value)?;
let output_string = variant_to_json_string(&v)?;
let output_string = v.to_json_string()?;
assert_eq!(output_string, "{\"a\":1,\"爱\":\"अ\"}");
let mut variant_builder = VariantBuilder::new();
let mut object_builder = variant_builder.new_object();
Expand Down
8 changes: 4 additions & 4 deletions parquet-variant-json/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
//! [Variant Binary Encoding]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
//! [Apache Parquet]: https://parquet.apache.org/
//!
//! * See [`json_to_variant`] for converting a JSON string to a Variant.
//! * See [`variant_to_json`] for converting a Variant to a JSON string.
//! * See [`JsonToVariant`] trait for converting a JSON string to a Variant.
//! * See [`VariantToJson`] trait for converting a Variant to a JSON string.
//!
//! ## 🚧 Work In Progress
//!
Expand All @@ -34,5 +34,5 @@
mod from_json;
mod to_json;

pub use from_json::json_to_variant;
pub use to_json::{variant_to_json, variant_to_json_string, variant_to_json_value};
pub use from_json::JsonToVariant;
pub use to_json::VariantToJson;
Loading
Loading