-
Notifications
You must be signed in to change notification settings - Fork 2k
Closed
Labels
enhancementNew feature or requestNew feature or request
Description
As in #199
Describe the bug
Distinct aggregates such as select count(distinct col) ... error when col is a dictionary
To Reproduce
let mut ctx = ExecutionContext::new();
// input data looks like:
// A, 1
// B, 2
// A, 2
// A, 4
// C, 1
// A, 1
let dict_array: DictionaryArray<K> =
vec!["A", "B", "A", "A", "C", "A"].into_iter().collect();
let dict_array = Arc::new(dict_array);
let val_array: Int64Array = vec![1, 2, 2, 4, 1, 1].into();
let val_array = Arc::new(val_array);
let schema = Arc::new(Schema::new(vec![
Field::new("dict", dict_array.data_type().clone(), false),
Field::new("val", val_array.data_type().clone(), false),
]));
let batch = RecordBatch::try_new(schema.clone(), vec![dict_array, val_array])
.unwrap();
let provider = MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap();
ctx.register_table("t", Arc::new(provider)).unwrap();
// Now, use dict as an aggregate
let results =
plan_and_collect(&mut ctx, "SELECT val, count(distinct dict) FROM t GROUP BY val")
.await
.expect("ran plan correctly");
let expected = vec![
"+-----+-------------+",
"| val | COUNT(dict) |",
"+-----+-------------+",
"| 1 | 2 |",
"| 2 | 2 |",
"| 4 | 1 |",
"+-----+-------------+",
];
assert_batches_sorted_eq!(expected, &results);
Fail with an error such as:
thread 'execution::context::tests::group_by_dictionary' panicked at 'ran plan correctly: ArrowError(ExternalError(NotImplemented("Can\'t create a scalar of array of type \"Dictionary(Int8, Utf8)\"")))', datafusion/src/execution/context.rs:1774:22
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
Expected behavior
test passes
Additional context
Add any other context about the problem here.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request