diff --git a/rust/lance-arrow/src/json.rs b/rust/lance-arrow/src/json.rs index 1b1e4dead0e..0b5e84ae5a8 100644 --- a/rust/lance-arrow/src/json.rs +++ b/rust/lance-arrow/src/json.rs @@ -116,8 +116,7 @@ impl JsonArray { } let jsonb_bytes = self.inner.value(i); - decode_json(jsonb_bytes) - .map_err(|e| ArrowError::InvalidArgumentError(format!("Failed to decode JSON: {}", e))) + Ok(decode_json(jsonb_bytes)) } /// Get the value at index i as raw JSONB bytes @@ -138,7 +137,7 @@ impl JsonArray { } /// Convert to Arrow string array (JSON as UTF-8) - pub fn to_arrow_json(&self) -> Result { + pub fn to_arrow_json(&self) -> ArrayRef { let mut builder = arrow_array::builder::StringBuilder::new(); for i in 0..self.len() { @@ -146,15 +145,13 @@ impl JsonArray { builder.append_null(); } else { let jsonb_bytes = self.inner.value(i); - let json_str = decode_json(jsonb_bytes).map_err(|e| { - ArrowError::InvalidArgumentError(format!("Failed to decode JSON: {}", e)) - })?; + let json_str = decode_json(jsonb_bytes); builder.append_value(&json_str); } } // Return as UTF-8 string array (Arrow represents JSON as strings) - Ok(Arc::new(builder.finish())) + Arc::new(builder.finish()) } } @@ -277,23 +274,19 @@ impl TryFrom for JsonArray { fn try_from(array_ref: ArrayRef) -> Result { match array_ref.data_type() { DataType::Utf8 => { + // Downcast is guaranteed to succeed after matching on DataType::Utf8 let string_array = array_ref .as_any() .downcast_ref::() - .ok_or_else(|| { - ArrowError::InvalidArgumentError("Failed to downcast to StringArray".into()) - })?; + .expect("DataType::Utf8 array must be StringArray"); Self::try_from(string_array) } DataType::LargeUtf8 => { + // Downcast is guaranteed to succeed after matching on DataType::LargeUtf8 let large_string_array = array_ref .as_any() .downcast_ref::() - .ok_or_else(|| { - ArrowError::InvalidArgumentError( - "Failed to downcast to LargeStringArray".into(), - ) - })?; + .expect("DataType::LargeUtf8 array must be LargeStringArray"); Self::try_from(large_string_array) } dt => Err(ArrowError::InvalidArgumentError(format!( @@ -311,9 +304,9 @@ pub fn encode_json(json_str: &str) -> Result, Box } /// Decode JSONB bytes to JSON string -pub fn decode_json(jsonb_bytes: &[u8]) -> Result> { +pub fn decode_json(jsonb_bytes: &[u8]) -> String { let raw_jsonb = jsonb::RawJsonb::new(jsonb_bytes); - Ok(raw_jsonb.to_string()) + raw_jsonb.to_string() } /// Extract JSONPath value from JSONB @@ -325,15 +318,11 @@ fn get_json_path( let raw_jsonb = jsonb::RawJsonb::new(jsonb_bytes); let mut selector = jsonb::jsonpath::Selector::new(raw_jsonb); - match selector.select_values(&json_path) { - Ok(values) => { - if values.is_empty() { - Ok(None) - } else { - Ok(Some(values[0].to_string())) - } - } - Err(e) => Err(Box::new(e)), + let values = selector.select_values(&json_path)?; + if values.is_empty() { + Ok(None) + } else { + Ok(Some(values[0].to_string())) } } @@ -390,15 +379,11 @@ pub fn convert_lance_json_to_arrow( new_columns.push(Arc::new(empty_strings) as ArrayRef); } else { // Convert JSONB back to JSON strings + // Downcast is guaranteed to succeed since is_json_field verified the type let binary_array = column .as_any() .downcast_ref::() - .ok_or_else(|| { - ArrowError::InvalidArgumentError(format!( - "Lance JSON field '{}' has unexpected type", - field.name() - )) - })?; + .expect("Lance JSON field must be LargeBinaryArray"); let mut builder = arrow_array::builder::StringBuilder::new(); for i in 0..binary_array.len() { @@ -406,12 +391,7 @@ pub fn convert_lance_json_to_arrow( builder.append_null(); } else { let jsonb_bytes = binary_array.value(i); - let json_str = decode_json(jsonb_bytes).map_err(|e| { - ArrowError::InvalidArgumentError(format!( - "Failed to decode JSON: {}", - e - )) - })?; + let json_str = decode_json(jsonb_bytes); builder.append_value(&json_str); } } @@ -460,19 +440,16 @@ pub fn convert_json_columns( new_columns.push(Arc::new(empty_binary) as ArrayRef); } else { // Convert non-empty data + // is_arrow_json_field guarantees type is Utf8 or LargeUtf8 let json_array = if let Some(string_array) = column.as_any().downcast_ref::() { JsonArray::try_from(string_array)? - } else if let Some(large_string_array) = - column.as_any().downcast_ref::() - { - JsonArray::try_from(large_string_array)? } else { - return Err(ArrowError::InvalidArgumentError(format!( - "Arrow JSON field '{}' has unexpected storage type: {:?}", - field.name(), - column.data_type() - ))); + let large_string_array = column + .as_any() + .downcast_ref::() + .expect("Arrow JSON field must be Utf8 or LargeUtf8"); + JsonArray::try_from(large_string_array)? }; let binary_array = json_array.into_inner(); @@ -601,8 +578,626 @@ mod tests { .unwrap(); for i in 0..binary_array.len() { let jsonb_bytes = binary_array.value(i); - let decoded = decode_json(jsonb_bytes).unwrap(); + let decoded = decode_json(jsonb_bytes); assert!(decoded.contains("name")); } } + + #[test] + fn test_has_json_fields() { + // Test direct JSON field + let json_f = json_field("data", true); + assert!(has_json_fields(&json_f)); + + // Test non-JSON field + let non_json = ArrowField::new("data", DataType::Utf8, true); + assert!(!has_json_fields(&non_json)); + + // Test struct containing JSON field + let struct_field = ArrowField::new( + "struct", + DataType::Struct(vec![json_field("nested_json", true)].into()), + true, + ); + assert!(has_json_fields(&struct_field)); + + // Test struct without JSON field + let struct_no_json = ArrowField::new( + "struct", + DataType::Struct(vec![ArrowField::new("text", DataType::Utf8, true)].into()), + true, + ); + assert!(!has_json_fields(&struct_no_json)); + + // Test List containing JSON field + let list_field = ArrowField::new( + "list", + DataType::List(Arc::new(json_field("item", true))), + true, + ); + assert!(has_json_fields(&list_field)); + + // Test LargeList containing JSON field + let large_list_field = ArrowField::new( + "large_list", + DataType::LargeList(Arc::new(json_field("item", true))), + true, + ); + assert!(has_json_fields(&large_list_field)); + + // Test FixedSizeList containing JSON field + let fixed_list_field = ArrowField::new( + "fixed_list", + DataType::FixedSizeList(Arc::new(json_field("item", true)), 3), + true, + ); + assert!(has_json_fields(&fixed_list_field)); + + // Test Map containing JSON field + let map_field = ArrowField::new( + "map", + DataType::Map( + Arc::new(ArrowField::new( + "entries", + DataType::Struct( + vec![ + ArrowField::new("key", DataType::Utf8, false), + json_field("value", true), + ] + .into(), + ), + false, + )), + false, + ), + true, + ); + assert!(has_json_fields(&map_field)); + } + + #[test] + fn test_json_array_inner() { + let json_array = JsonArray::try_from_iter(vec![Some(r#"{"a": 1}"#)]).unwrap(); + let inner = json_array.inner(); + assert_eq!(inner.len(), 1); + } + + #[test] + fn test_json_array_value_null_error() { + let json_array = JsonArray::try_from_iter(vec![None::<&str>]).unwrap(); + let result = json_array.value(0); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("null")); + } + + #[test] + fn test_json_array_value_bytes() { + let json_array = JsonArray::try_from_iter(vec![Some(r#"{"a": 1}"#)]).unwrap(); + let bytes = json_array.value_bytes(0); + assert!(!bytes.is_empty()); + } + + #[test] + fn test_json_path_with_null() { + let json_array = + JsonArray::try_from_iter(vec![Some(r#"{"user": {"name": "Alice"}}"#), None::<&str>]) + .unwrap(); + + let result = json_array.json_path(1, "$.user.name").unwrap(); + assert_eq!(result, None); + } + + #[test] + fn test_to_arrow_json() { + let json_array = JsonArray::try_from_iter(vec![ + Some(r#"{"name": "Alice"}"#), + None::<&str>, + Some(r#"{"name": "Bob"}"#), + ]) + .unwrap(); + + let arrow_json = json_array.to_arrow_json(); + assert_eq!(arrow_json.len(), 3); + assert!(!arrow_json.is_null(0)); + assert!(arrow_json.is_null(1)); + assert!(!arrow_json.is_null(2)); + + let string_array = arrow_json.as_any().downcast_ref::().unwrap(); + assert!(string_array.value(0).contains("Alice")); + assert!(string_array.value(2).contains("Bob")); + } + + #[test] + fn test_json_array_trait_methods() { + let json_array = + JsonArray::try_from_iter(vec![Some(r#"{"a": 1}"#), Some(r#"{"b": 2}"#)]).unwrap(); + + // Test as_any + let any_ref = json_array.as_any(); + assert!(any_ref.downcast_ref::().is_some()); + + // Test data_type + assert_eq!(json_array.data_type(), &DataType::LargeBinary); + + // Test len (already covered, but included for completeness) + assert_eq!(json_array.len(), 2); + + // Test is_empty + assert!(!json_array.is_empty()); + + // Test offset + assert_eq!(json_array.offset(), 0); + + // Test get_buffer_memory_size + assert!(json_array.get_buffer_memory_size() > 0); + + // Test get_array_memory_size + assert!(json_array.get_array_memory_size() > 0); + + // Test to_data + let data = json_array.clone().to_data(); + assert_eq!(data.len(), 2); + + // Test into_data + let data = json_array.clone().into_data(); + assert_eq!(data.len(), 2); + + // Test slice + let sliced = json_array.slice(0, 1); + assert_eq!(sliced.len(), 1); + } + + #[test] + fn test_json_array_empty() { + let json_array = JsonArray::try_from_iter(Vec::>::new()).unwrap(); + assert!(json_array.is_empty()); + assert_eq!(json_array.len(), 0); + } + + #[test] + fn test_try_from_large_string_array() { + let large_string_array = LargeStringArray::from(vec![ + Some(r#"{"name": "Alice"}"#), + Some(r#"{"name": "Bob"}"#), + None, + ]); + + // Test TryFrom<&LargeStringArray> + let json_array = JsonArray::try_from(&large_string_array).unwrap(); + assert_eq!(json_array.len(), 3); + assert!(!json_array.is_null(0)); + assert!(!json_array.is_null(1)); + assert!(json_array.is_null(2)); + + // Test TryFrom (owned) + let large_string_array2 = LargeStringArray::from(vec![Some(r#"{"x": 1}"#)]); + let json_array2 = JsonArray::try_from(large_string_array2).unwrap(); + assert_eq!(json_array2.len(), 1); + } + + #[test] + fn test_try_from_array_ref() { + // Test with Utf8 + let string_array: ArrayRef = Arc::new(StringArray::from(vec![ + Some(r#"{"a": 1}"#), + Some(r#"{"b": 2}"#), + ])); + let json_array = JsonArray::try_from(string_array).unwrap(); + assert_eq!(json_array.len(), 2); + + // Test with LargeUtf8 + let large_string_array: ArrayRef = Arc::new(LargeStringArray::from(vec![ + Some(r#"{"c": 3}"#), + Some(r#"{"d": 4}"#), + ])); + let json_array2 = JsonArray::try_from(large_string_array).unwrap(); + assert_eq!(json_array2.len(), 2); + + // Test with unsupported type + let int_array: ArrayRef = Arc::new(arrow_array::Int32Array::from(vec![1, 2, 3])); + let result = JsonArray::try_from(int_array); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("Unsupported")); + } + + #[test] + fn test_arrow_json_to_lance_json_non_json_field() { + // Test that non-JSON fields are returned unchanged + let field = ArrowField::new("text", DataType::Utf8, true); + let converted = arrow_json_to_lance_json(&field); + assert_eq!(converted.data_type(), &DataType::Utf8); + assert_eq!(converted.name(), "text"); + } + + #[test] + fn test_convert_lance_json_to_arrow() { + // Create a batch with Lance JSON column (JSONB) + let json_array = JsonArray::try_from_iter(vec![ + Some(r#"{"name": "Alice"}"#), + None::<&str>, + Some(r#"{"name": "Bob"}"#), + ]) + .unwrap(); + + let lance_json_field = json_field("data", true); + let schema = Arc::new(Schema::new(vec![lance_json_field])); + let batch = + RecordBatch::try_new(schema, vec![Arc::new(json_array.into_inner()) as ArrayRef]) + .unwrap(); + + // Convert back to Arrow JSON + let converted = convert_lance_json_to_arrow(&batch).unwrap(); + + // Check schema + let converted_schema = converted.schema(); + let converted_field = converted_schema.field(0); + assert_eq!(converted_field.data_type(), &DataType::Utf8); + assert_eq!( + converted_field.metadata().get(ARROW_EXT_NAME_KEY), + Some(&ARROW_JSON_EXT_NAME.to_string()) + ); + + // Check data + let string_array = converted + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert!(!string_array.is_null(0)); + assert!(string_array.is_null(1)); + assert!(!string_array.is_null(2)); + assert!(string_array.value(0).contains("Alice")); + assert!(string_array.value(2).contains("Bob")); + } + + #[test] + fn test_convert_lance_json_to_arrow_empty_batch() { + // Create an empty batch with Lance JSON column + let lance_json_field = json_field("data", true); + let schema = Arc::new(Schema::new(vec![lance_json_field])); + let empty_binary = LargeBinaryBuilder::new().finish(); + let batch = RecordBatch::try_new(schema, vec![Arc::new(empty_binary) as ArrayRef]).unwrap(); + + // Convert back to Arrow JSON + let converted = convert_lance_json_to_arrow(&batch).unwrap(); + assert_eq!(converted.num_rows(), 0); + assert_eq!(converted.schema().field(0).data_type(), &DataType::Utf8); + } + + #[test] + fn test_convert_lance_json_to_arrow_no_json_columns() { + // Create a batch without JSON columns + let field = ArrowField::new("text", DataType::Utf8, true); + let schema = Arc::new(Schema::new(vec![field])); + let string_array = StringArray::from(vec![Some("hello"), Some("world")]); + let batch = RecordBatch::try_new(schema, vec![Arc::new(string_array) as ArrayRef]).unwrap(); + + // Convert - should return the same batch + let converted = convert_lance_json_to_arrow(&batch).unwrap(); + assert_eq!(converted.num_columns(), 1); + assert_eq!(converted.schema().field(0).data_type(), &DataType::Utf8); + } + + #[test] + fn test_convert_json_columns_empty_batch() { + // Create an empty batch with Arrow JSON column + let mut field = ArrowField::new("data", DataType::Utf8, false); + let mut metadata = std::collections::HashMap::new(); + metadata.insert( + ARROW_EXT_NAME_KEY.to_string(), + ARROW_JSON_EXT_NAME.to_string(), + ); + field.set_metadata(metadata); + + let schema = Arc::new(Schema::new(vec![field])); + let empty_strings = arrow_array::builder::StringBuilder::new().finish(); + let batch = + RecordBatch::try_new(schema, vec![Arc::new(empty_strings) as ArrayRef]).unwrap(); + + let converted = convert_json_columns(&batch).unwrap(); + assert_eq!(converted.num_rows(), 0); + assert_eq!( + converted.schema().field(0).data_type(), + &DataType::LargeBinary + ); + } + + #[test] + fn test_convert_json_columns_large_string() { + // Create a batch with Arrow JSON column using LargeUtf8 + let json_strings = LargeStringArray::from(vec![ + Some(r#"{"name": "Alice"}"#), + Some(r#"{"name": "Bob"}"#), + ]); + + let mut field = ArrowField::new("data", DataType::LargeUtf8, false); + let mut metadata = std::collections::HashMap::new(); + metadata.insert( + ARROW_EXT_NAME_KEY.to_string(), + ARROW_JSON_EXT_NAME.to_string(), + ); + field.set_metadata(metadata); + + let schema = Arc::new(Schema::new(vec![field])); + let batch = RecordBatch::try_new(schema, vec![Arc::new(json_strings) as ArrayRef]).unwrap(); + + let converted = convert_json_columns(&batch).unwrap(); + assert_eq!(converted.num_columns(), 1); + assert_eq!( + converted.schema().field(0).data_type(), + &DataType::LargeBinary + ); + assert_eq!(converted.num_rows(), 2); + } + + #[test] + fn test_convert_json_columns_no_json_columns() { + // Create a batch without JSON columns + let field = ArrowField::new("text", DataType::Utf8, true); + let schema = Arc::new(Schema::new(vec![field])); + let string_array = StringArray::from(vec![Some("hello"), Some("world")]); + let batch = RecordBatch::try_new(schema, vec![Arc::new(string_array) as ArrayRef]).unwrap(); + + // Convert - should return the same batch + let converted = convert_json_columns(&batch).unwrap(); + assert_eq!(converted.num_columns(), 1); + assert_eq!(converted.schema().field(0).data_type(), &DataType::Utf8); + } + + #[test] + fn test_convert_json_columns_mixed_columns() { + // Create a batch with both JSON and non-JSON columns + let json_strings = StringArray::from(vec![ + Some(r#"{"name": "Alice"}"#), + Some(r#"{"name": "Bob"}"#), + ]); + let text_strings = StringArray::from(vec![Some("hello"), Some("world")]); + + let mut json_field = ArrowField::new("json_data", DataType::Utf8, false); + let mut metadata = std::collections::HashMap::new(); + metadata.insert( + ARROW_EXT_NAME_KEY.to_string(), + ARROW_JSON_EXT_NAME.to_string(), + ); + json_field.set_metadata(metadata); + + let text_field = ArrowField::new("text_data", DataType::Utf8, true); + + let schema = Arc::new(Schema::new(vec![json_field, text_field])); + let batch = RecordBatch::try_new( + schema, + vec![ + Arc::new(json_strings) as ArrayRef, + Arc::new(text_strings) as ArrayRef, + ], + ) + .unwrap(); + + let converted = convert_json_columns(&batch).unwrap(); + assert_eq!(converted.num_columns(), 2); + assert_eq!( + converted.schema().field(0).data_type(), + &DataType::LargeBinary + ); + assert_eq!(converted.schema().field(1).data_type(), &DataType::Utf8); + } + + #[test] + fn test_is_arrow_json_field_large_utf8() { + // Test with LargeUtf8 storage type + let mut field = ArrowField::new("data", DataType::LargeUtf8, true); + let mut metadata = std::collections::HashMap::new(); + metadata.insert( + ARROW_EXT_NAME_KEY.to_string(), + ARROW_JSON_EXT_NAME.to_string(), + ); + field.set_metadata(metadata); + + assert!(is_arrow_json_field(&field)); + } + + #[test] + fn test_encode_json_invalid() { + // Test encoding invalid JSON + let result = encode_json("not valid json {"); + assert!(result.is_err()); + } + + #[test] + fn test_json_array_from_invalid_json() { + // Test creating JsonArray from invalid JSON strings + let result = JsonArray::try_from_iter(vec![Some("invalid json {")]); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("Failed to encode")); + } + + #[test] + fn test_try_from_string_array_invalid_json() { + let string_array = StringArray::from(vec![Some("invalid json {")]); + let result = JsonArray::try_from(string_array); + assert!(result.is_err()); + } + + #[test] + fn test_try_from_large_string_array_invalid_json() { + let large_string_array = LargeStringArray::from(vec![Some("invalid json {")]); + let result = JsonArray::try_from(large_string_array); + assert!(result.is_err()); + } + + #[test] + fn test_convert_lance_json_to_arrow_mixed_columns() { + // Create a batch with both JSON and non-JSON columns + let json_array = JsonArray::try_from_iter(vec![ + Some(r#"{"name": "Alice"}"#), + Some(r#"{"name": "Bob"}"#), + ]) + .unwrap(); + let text_strings = StringArray::from(vec![Some("hello"), Some("world")]); + + let json_f = json_field("json_data", true); + let text_field = ArrowField::new("text_data", DataType::Utf8, true); + + let schema = Arc::new(Schema::new(vec![json_f, text_field])); + let batch = RecordBatch::try_new( + schema, + vec![ + Arc::new(json_array.into_inner()) as ArrayRef, + Arc::new(text_strings) as ArrayRef, + ], + ) + .unwrap(); + + let converted = convert_lance_json_to_arrow(&batch).unwrap(); + assert_eq!(converted.num_columns(), 2); + assert_eq!(converted.schema().field(0).data_type(), &DataType::Utf8); + assert_eq!(converted.schema().field(1).data_type(), &DataType::Utf8); + } + + #[test] + fn test_json_path_invalid_path() { + let json_array = JsonArray::try_from_iter(vec![Some(r#"{"a": 1}"#)]).unwrap(); + // Invalid JSONPath syntax should return error + let result = json_array.json_path(0, "invalid path without $"); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("Failed to extract JSONPath")); + } + + #[test] + fn test_convert_json_columns_invalid_storage_type() { + // Create a batch with Arrow JSON field but wrong storage type (Int32 instead of Utf8) + let int_array = arrow_array::Int32Array::from(vec![1, 2, 3]); + + let mut field = ArrowField::new("data", DataType::Int32, false); + let mut metadata = std::collections::HashMap::new(); + metadata.insert( + ARROW_EXT_NAME_KEY.to_string(), + ARROW_JSON_EXT_NAME.to_string(), + ); + field.set_metadata(metadata); + + let schema = Arc::new(Schema::new(vec![field])); + let batch = RecordBatch::try_new(schema, vec![Arc::new(int_array) as ArrayRef]).unwrap(); + + // This should succeed since Int32 doesn't match is_arrow_json_field check + // (is_arrow_json_field requires Utf8 or LargeUtf8) + let result = convert_json_columns(&batch); + assert!(result.is_ok()); + } + + #[test] + fn test_is_json_field_wrong_extension() { + // LargeBinary field without the correct extension metadata + let field = ArrowField::new("data", DataType::LargeBinary, true); + assert!(!is_json_field(&field)); + + // LargeBinary field with wrong extension name + let mut field2 = ArrowField::new("data", DataType::LargeBinary, true); + let mut metadata = std::collections::HashMap::new(); + metadata.insert( + ARROW_EXT_NAME_KEY.to_string(), + "other.extension".to_string(), + ); + field2.set_metadata(metadata); + assert!(!is_json_field(&field2)); + } + + #[test] + fn test_is_arrow_json_field_wrong_extension() { + // Utf8 field without extension metadata + let field = ArrowField::new("data", DataType::Utf8, true); + assert!(!is_arrow_json_field(&field)); + + // Utf8 field with wrong extension name + let mut field2 = ArrowField::new("data", DataType::Utf8, true); + let mut metadata = std::collections::HashMap::new(); + metadata.insert( + ARROW_EXT_NAME_KEY.to_string(), + "other.extension".to_string(), + ); + field2.set_metadata(metadata); + assert!(!is_arrow_json_field(&field2)); + + // Wrong type entirely + let field3 = ArrowField::new("data", DataType::Int32, true); + assert!(!is_arrow_json_field(&field3)); + } + + #[test] + fn test_convert_json_columns_invalid_json_utf8() { + // Test error propagation when converting invalid JSON (Utf8) + let invalid_json = StringArray::from(vec![Some("invalid json {")]); + + let mut field = ArrowField::new("data", DataType::Utf8, false); + let mut metadata = std::collections::HashMap::new(); + metadata.insert( + ARROW_EXT_NAME_KEY.to_string(), + ARROW_JSON_EXT_NAME.to_string(), + ); + field.set_metadata(metadata); + + let schema = Arc::new(Schema::new(vec![field])); + let batch = RecordBatch::try_new(schema, vec![Arc::new(invalid_json) as ArrayRef]).unwrap(); + + let result = convert_json_columns(&batch); + assert!(result.is_err()); + } + + #[test] + fn test_convert_json_columns_invalid_json_large_utf8() { + // Test error propagation when converting invalid JSON (LargeUtf8) + let invalid_json = LargeStringArray::from(vec![Some("invalid json {")]); + + let mut field = ArrowField::new("data", DataType::LargeUtf8, false); + let mut metadata = std::collections::HashMap::new(); + metadata.insert( + ARROW_EXT_NAME_KEY.to_string(), + ARROW_JSON_EXT_NAME.to_string(), + ); + field.set_metadata(metadata); + + let schema = Arc::new(Schema::new(vec![field])); + let batch = RecordBatch::try_new(schema, vec![Arc::new(invalid_json) as ArrayRef]).unwrap(); + + let result = convert_json_columns(&batch); + assert!(result.is_err()); + } + + #[test] + fn test_json_path_on_corrupted_jsonb() { + // Create corrupted JSONB bytes directly + let corrupted_bytes: &[u8] = &[0xFF, 0xFE, 0x00, 0x01, 0x02]; + let corrupted_binary = LargeBinaryArray::from(vec![Some(corrupted_bytes)]); + + // Wrap in JsonArray + let corrupted_json = JsonArray { + inner: corrupted_binary, + }; + + // Try to use json_path on corrupted data - the selector might fail or return unexpected results + // This exercises the code path but may not produce an error depending on jsonb library behavior + let _result = corrupted_json.json_path(0, "$.a"); + // We don't assert on the result as the behavior depends on the jsonb library + } + + #[test] + fn test_decode_json_on_various_inputs() { + // Test decode_json with various inputs + let valid_jsonb = encode_json(r#"{"key": "value"}"#).unwrap(); + let decoded = decode_json(&valid_jsonb); + assert!(decoded.contains("key")); + + // Empty bytes - jsonb library handles this gracefully + let decoded_empty = decode_json(&[]); + // Just verify it doesn't panic + let _ = decoded_empty; + + // Random bytes - jsonb library handles this gracefully + let decoded_random = decode_json(&[0xFF, 0xFE, 0x00]); + // Just verify it doesn't panic + let _ = decoded_random; + } }