ArrowError(InvalidArgumentError("RowConverter column schema mismatch, expected Utf8 got Int64"), None)
use std::sync::Arc;
use datafusion::arrow::array::{Int32Array, StringArray, ArrayRef, ListArray};
use datafusion::arrow::datatypes::{DataType, Field, Schema};
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::arrow::buffer::OffsetBuffer;
use datafusion::datasource::MemTable;
use datafusion::error::Result;
use datafusion::prelude::*;
#[tokio::main]
async fn main() -> Result<()> {
// Create a simple datafusion table
let ctx = SessionContext::new();
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("taxonomy", DataType::List(Arc::new(Field::new(
"item",
DataType::Utf8,
true,
))), false),
]));
let id_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
let values = Arc::new(StringArray::from(vec![
"category_1", "category_2", // For id=1
"category_3", // For id=2
"category_4" // For id=3
]));
let offsets = OffsetBuffer::new(vec![0, 2, 3, 4].into());
let field = Arc::new(Field::new("item", DataType::Utf8, true));
let taxonomy_array = Arc::new(ListArray::new(field, offsets, values, None));
let batch = RecordBatch::try_new(
schema.clone(),
vec![id_array, taxonomy_array],
)?;
let partitions = vec![vec![batch]];
let table = MemTable::try_new(schema, partitions)?;
ctx.register_table("test_table", Arc::new(table))?;
println!("Original data:");
let df = ctx.table("test_table").await?;
df.show().await?;
// Execute the problematic query with empty array
println!("\nExecuting filter with empty array:");
let query = "SELECT * FROM test_table WHERE array_has_any(taxonomy, [])";
// print
// Error: ArrowError(InvalidArgumentError("RowConverter column schema mismatch, expected Utf8 got Int64"), None)
ctx.sql(query).await?.show().await?;
Ok(())
}
The error message is misleading and does not clearly indicate that an empty array might be an invalid or unsupported input. The issue could be improved by either:
Describe the bug
Executing
array_has_any(column_name, [])results in an error:To Reproduce
datafusion version "44.0"
Expected behavior
The error message is misleading and does not clearly indicate that an empty array might be an invalid or unsupported input. The issue could be improved by either:
Additional context
No response