From 6adb8f7af8359675d38260f24e139d92ea22cdef Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Sun, 18 Jan 2026 16:54:32 +0100 Subject: [PATCH 01/70] Add a default FileStatisticsCache implementation for the ListingTable --- datafusion-cli/src/main.rs | 49 -- datafusion/catalog-listing/src/table.rs | 30 +- datafusion/common/src/heap_size.rs | 458 ++++++++++++++++++ datafusion/common/src/lib.rs | 2 + .../src/datasource/listing_table_factory.rs | 4 +- datafusion/core/src/execution/context/mod.rs | 11 +- .../core/tests/parquet/file_statistics.rs | 2 +- datafusion/core/tests/sql/runtime_config.rs | 46 ++ .../execution/src/cache/cache_manager.rs | 68 ++- datafusion/execution/src/cache/cache_unit.rs | 249 +++++++++- datafusion/execution/src/runtime_env.rs | 26 +- .../test_files/encrypted_parquet.slt | 4 + .../test_files/information_schema.slt | 2 + .../test_files/parquet_sorted_statistics.slt | 4 + .../sqllogictest/test_files/set_variable.slt | 16 + 15 files changed, 868 insertions(+), 103 deletions(-) create mode 100644 datafusion/common/src/heap_size.rs diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 6bfe1160ecdd6..1909ed392afd3 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -689,55 +689,6 @@ mod tests { // When the cache manager creates a StatisticsCache by default, // the contents will show up here - let sql = "SELECT split_part(path, '/', -1) as filename, file_size_bytes, num_rows, num_columns, table_size_bytes from statistics_cache() order by filename"; - let df = ctx.sql(sql).await?; - let rbs = df.collect().await?; - assert_snapshot!(batches_to_string(&rbs),@r" - ++ - ++ - "); - - Ok(()) - } - - // Can be removed when https://github.com/apache/datafusion/issues/19217 is resolved - #[tokio::test] - async fn test_statistics_cache_override() -> Result<(), DataFusionError> { - // Install a specific StatisticsCache implementation - let file_statistics_cache = Arc::new(DefaultFileStatisticsCache::default()); - let cache_config = CacheManagerConfig::default() - .with_files_statistics_cache(Some(file_statistics_cache.clone())); - let runtime = RuntimeEnvBuilder::new() - .with_cache_manager(cache_config) - .build()?; - let config = SessionConfig::new().with_collect_statistics(true); - let ctx = SessionContext::new_with_config_rt(config, Arc::new(runtime)); - - ctx.register_udtf( - "statistics_cache", - Arc::new(StatisticsCacheFunc::new( - ctx.task_ctx().runtime_env().cache_manager.clone(), - )), - ); - - for filename in [ - "alltypes_plain", - "alltypes_tiny_pages", - "lz4_raw_compressed_larger", - ] { - ctx.sql( - format!( - "create external table {filename} - stored as parquet - location '../parquet-testing/data/{filename}.parquet'", - ) - .as_str(), - ) - .await? - .collect() - .await?; - } - let sql = "SELECT split_part(path, '/', -1) as filename, file_size_bytes, num_rows, num_columns, table_size_bytes from statistics_cache() order by filename"; let df = ctx.sql(sql).await?; let rbs = df.collect().await?; diff --git a/datafusion/catalog-listing/src/table.rs b/datafusion/catalog-listing/src/table.rs index 06ba8c8113fac..0ed2b452bc626 100644 --- a/datafusion/catalog-listing/src/table.rs +++ b/datafusion/catalog-listing/src/table.rs @@ -36,7 +36,6 @@ use datafusion_datasource::{ }; use datafusion_execution::cache::TableScopedPath; use datafusion_execution::cache::cache_manager::FileStatisticsCache; -use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache; use datafusion_expr::dml::InsertOp; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType}; @@ -187,7 +186,7 @@ pub struct ListingTable { /// The SQL definition for this table, if any definition: Option, /// Cache for collected file statistics - collected_statistics: Arc, + collected_statistics: Option>, /// Constraints applied to this table constraints: Constraints, /// Column default expressions for columns that are not physically present in the data files @@ -231,7 +230,7 @@ impl ListingTable { schema_source, options, definition: None, - collected_statistics: Arc::new(DefaultFileStatisticsCache::default()), + collected_statistics: None, constraints: Constraints::default(), column_defaults: HashMap::new(), expr_adapter_factory: config.expr_adapter_factory, @@ -260,10 +259,8 @@ impl ListingTable { /// Setting a statistics cache on the `SessionContext` can avoid refetching statistics /// multiple times in the same session. /// - /// If `None`, creates a new [`DefaultFileStatisticsCache`] scoped to this query. pub fn with_cache(mut self, cache: Option>) -> Self { - self.collected_statistics = - cache.unwrap_or_else(|| Arc::new(DefaultFileStatisticsCache::default())); + self.collected_statistics = cache; self } @@ -806,7 +803,8 @@ impl ListingTable { let meta = &part_file.object_meta; // Check cache first - if we have valid cached statistics and ordering - if let Some(cached) = self.collected_statistics.get(path) + if let Some(cache) = &self.collected_statistics + && let Some(cached) = cache.get(path) && cached.is_valid_for(meta) { // Return cached statistics and ordering @@ -823,14 +821,16 @@ impl ListingTable { let statistics = Arc::new(file_meta.statistics); // Store in cache - self.collected_statistics.put( - path, - CachedFileMetadata::new( - meta.clone(), - Arc::clone(&statistics), - file_meta.ordering.clone(), - ), - ); + if let Some(cache) = &self.collected_statistics { + cache.put( + path, + CachedFileMetadata::new( + meta.clone(), + Arc::clone(&statistics), + file_meta.ordering.clone(), + ), + ); + } Ok((statistics, file_meta.ordering)) } diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs new file mode 100644 index 0000000000000..6dee7d5c0a373 --- /dev/null +++ b/datafusion/common/src/heap_size.rs @@ -0,0 +1,458 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::stats::Precision; +use crate::{ColumnStatistics, ScalarValue, Statistics}; +use arrow::array::{ + Array, FixedSizeListArray, LargeListArray, ListArray, MapArray, StructArray, +}; +use arrow::datatypes::{ + DataType, Field, Fields, IntervalDayTime, IntervalMonthDayNano, IntervalUnit, + TimeUnit, UnionFields, UnionMode, i256, +}; +use chrono::{DateTime, Utc}; +use half::f16; +use std::collections::HashMap; +use std::fmt::Debug; +use std::sync::Arc; + +/// This is a temporary solution until and +/// are resolved. +/// Trait for calculating the size of various containers +pub trait DFHeapSize { + /// Return the size of any bytes allocated on the heap by this object, + /// including heap memory in those structures + /// + /// Note that the size of the type itself is not included in the result -- + /// instead, that size is added by the caller (e.g. container). + fn heap_size(&self) -> usize; +} + +impl DFHeapSize for Statistics { + fn heap_size(&self) -> usize { + self.num_rows.heap_size() + + self.total_byte_size.heap_size() + + self + .column_statistics + .iter() + .map(|s| s.heap_size()) + .sum::() + } +} + +impl DFHeapSize + for Precision +{ + fn heap_size(&self) -> usize { + self.get_value().map_or_else(|| 0, |v| v.heap_size()) + } +} + +impl DFHeapSize for ColumnStatistics { + fn heap_size(&self) -> usize { + self.null_count.heap_size() + + self.max_value.heap_size() + + self.min_value.heap_size() + + self.sum_value.heap_size() + + self.distinct_count.heap_size() + + self.byte_size.heap_size() + } +} + +impl DFHeapSize for ScalarValue { + fn heap_size(&self) -> usize { + use crate::scalar::ScalarValue::*; + match self { + Null => 0, + Boolean(b) => b.heap_size(), + Float16(f) => f.heap_size(), + Float32(f) => f.heap_size(), + Float64(f) => f.heap_size(), + Decimal32(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), + Decimal64(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), + Decimal128(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), + Decimal256(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), + Int8(i) => i.heap_size(), + Int16(i) => i.heap_size(), + Int32(i) => i.heap_size(), + Int64(i) => i.heap_size(), + UInt8(u) => u.heap_size(), + UInt16(u) => u.heap_size(), + UInt32(u) => u.heap_size(), + UInt64(u) => u.heap_size(), + Utf8(u) => u.heap_size(), + Utf8View(u) => u.heap_size(), + LargeUtf8(l) => l.heap_size(), + Binary(b) => b.heap_size(), + BinaryView(b) => b.heap_size(), + FixedSizeBinary(a, b) => a.heap_size() + b.heap_size(), + LargeBinary(l) => l.heap_size(), + FixedSizeList(f) => f.heap_size(), + List(l) => l.heap_size(), + LargeList(l) => l.heap_size(), + Struct(s) => s.heap_size(), + Map(m) => m.heap_size(), + Date32(d) => d.heap_size(), + Date64(d) => d.heap_size(), + Time32Second(t) => t.heap_size(), + Time32Millisecond(t) => t.heap_size(), + Time64Microsecond(t) => t.heap_size(), + Time64Nanosecond(t) => t.heap_size(), + TimestampSecond(a, b) => a.heap_size() + b.heap_size(), + TimestampMillisecond(a, b) => a.heap_size() + b.heap_size(), + TimestampMicrosecond(a, b) => a.heap_size() + b.heap_size(), + TimestampNanosecond(a, b) => a.heap_size() + b.heap_size(), + IntervalYearMonth(i) => i.heap_size(), + IntervalDayTime(i) => i.heap_size(), + IntervalMonthDayNano(i) => i.heap_size(), + DurationSecond(d) => d.heap_size(), + DurationMillisecond(d) => d.heap_size(), + DurationMicrosecond(d) => d.heap_size(), + DurationNanosecond(d) => d.heap_size(), + Union(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), + Dictionary(a, b) => a.heap_size() + b.heap_size(), + } + } +} + +impl DFHeapSize for DataType { + fn heap_size(&self) -> usize { + use DataType::*; + match self { + Null => 0, + Boolean => 0, + Int8 => 0, + Int16 => 0, + Int32 => 0, + Int64 => 0, + UInt8 => 0, + UInt16 => 0, + UInt32 => 0, + UInt64 => 0, + Float16 => 0, + Float32 => 0, + Float64 => 0, + Timestamp(t, s) => t.heap_size() + s.heap_size(), + Date32 => 0, + Date64 => 0, + Time32(t) => t.heap_size(), + Time64(t) => t.heap_size(), + Duration(t) => t.heap_size(), + Interval(i) => i.heap_size(), + Binary => 0, + FixedSizeBinary(i) => i.heap_size(), + LargeBinary => 0, + BinaryView => 0, + Utf8 => 0, + LargeUtf8 => 0, + Utf8View => 0, + List(v) => v.heap_size(), + ListView(v) => v.heap_size(), + FixedSizeList(f, i) => f.heap_size() + i.heap_size(), + LargeList(l) => l.heap_size(), + LargeListView(l) => l.heap_size(), + Struct(s) => s.heap_size(), + Union(u, m) => u.heap_size() + m.heap_size(), + Dictionary(a, b) => a.heap_size() + b.heap_size(), + Decimal32(u8, i8) => u8.heap_size() + i8.heap_size(), + Decimal64(u8, i8) => u8.heap_size() + i8.heap_size(), + Decimal128(u8, i8) => u8.heap_size() + i8.heap_size(), + Decimal256(u8, i8) => u8.heap_size() + i8.heap_size(), + Map(m, b) => m.heap_size() + b.heap_size(), + RunEndEncoded(a, b) => a.heap_size() + b.heap_size(), + } + } +} + +impl DFHeapSize for Vec { + fn heap_size(&self) -> usize { + let item_size = size_of::(); + // account for the contents of the Vec + (self.capacity() * item_size) + + // add any heap allocations by contents + self.iter().map(|t| t.heap_size()).sum::() + } +} + +impl DFHeapSize for HashMap { + fn heap_size(&self) -> usize { + let capacity = self.capacity(); + if capacity == 0 { + return 0; + } + + // HashMap doesn't provide a way to get its heap size, so this is an approximation based on + // the behavior of hashbrown::HashMap as at version 0.16.0, and may become inaccurate + // if the implementation changes. + let key_val_size = size_of::<(K, V)>(); + // Overhead for the control tags group, which may be smaller depending on architecture + let group_size = 16; + // 1 byte of metadata stored per bucket. + let metadata_size = 1; + + // Compute the number of buckets for the capacity. Based on hashbrown's capacity_to_buckets + let buckets = if capacity < 15 { + let min_cap = match key_val_size { + 0..=1 => 14, + 2..=3 => 7, + _ => 3, + }; + let cap = min_cap.max(capacity); + if cap < 4 { + 4 + } else if cap < 8 { + 8 + } else { + 16 + } + } else { + (capacity.saturating_mul(8) / 7).next_power_of_two() + }; + + group_size + + (buckets * (key_val_size + metadata_size)) + + self.keys().map(|k| k.heap_size()).sum::() + + self.values().map(|v| v.heap_size()).sum::() + } +} + +impl DFHeapSize for Arc { + fn heap_size(&self) -> usize { + // Arc stores weak and strong counts on the heap alongside an instance of T + 2 * size_of::() + size_of::() + self.as_ref().heap_size() + } +} + +impl DFHeapSize for Arc { + fn heap_size(&self) -> usize { + 2 * size_of::() + size_of_val(self.as_ref()) + self.as_ref().heap_size() + } +} + +impl DFHeapSize for Fields { + fn heap_size(&self) -> usize { + self.into_iter().map(|f| f.heap_size()).sum::() + } +} + +impl DFHeapSize for StructArray { + fn heap_size(&self) -> usize { + self.get_array_memory_size() + } +} + +impl DFHeapSize for LargeListArray { + fn heap_size(&self) -> usize { + self.get_array_memory_size() + } +} + +impl DFHeapSize for ListArray { + fn heap_size(&self) -> usize { + self.get_array_memory_size() + } +} + +impl DFHeapSize for FixedSizeListArray { + fn heap_size(&self) -> usize { + self.get_array_memory_size() + } +} +impl DFHeapSize for MapArray { + fn heap_size(&self) -> usize { + self.get_array_memory_size() + } +} + +impl DFHeapSize for Arc { + fn heap_size(&self) -> usize { + 2 * size_of::() + self.as_ref().heap_size() + } +} + +impl DFHeapSize for Box { + fn heap_size(&self) -> usize { + size_of::() + self.as_ref().heap_size() + } +} + +impl DFHeapSize for Option { + fn heap_size(&self) -> usize { + self.as_ref().map(|inner| inner.heap_size()).unwrap_or(0) + } +} + +impl DFHeapSize for (A, B) +where + A: DFHeapSize, + B: DFHeapSize, +{ + fn heap_size(&self) -> usize { + self.0.heap_size() + self.1.heap_size() + } +} + +impl DFHeapSize for String { + fn heap_size(&self) -> usize { + self.capacity() + } +} + +impl DFHeapSize for str { + fn heap_size(&self) -> usize { + self.to_string().capacity() + } +} + +impl DFHeapSize for UnionFields { + fn heap_size(&self) -> usize { + self.iter().map(|f| f.0.heap_size() + f.1.heap_size()).sum() + } +} + +impl DFHeapSize for UnionMode { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for TimeUnit { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for IntervalUnit { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for Field { + fn heap_size(&self) -> usize { + self.name().heap_size() + + self.data_type().heap_size() + + self.is_nullable().heap_size() + + self.dict_is_ordered().heap_size() + + self.metadata().heap_size() + } +} + +impl DFHeapSize for IntervalMonthDayNano { + fn heap_size(&self) -> usize { + self.days.heap_size() + self.months.heap_size() + self.nanoseconds.heap_size() + } +} + +impl DFHeapSize for IntervalDayTime { + fn heap_size(&self) -> usize { + self.days.heap_size() + self.milliseconds.heap_size() + } +} + +impl DFHeapSize for DateTime { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for bool { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} +impl DFHeapSize for u8 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for u16 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for u32 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for u64 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for i8 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for i16 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for i32 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} +impl DFHeapSize for i64 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for i128 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for i256 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for f16 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for f32 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} +impl DFHeapSize for f64 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for usize { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs index 996c563f0d8a2..40af6b1b3882c 100644 --- a/datafusion/common/src/lib.rs +++ b/datafusion/common/src/lib.rs @@ -46,6 +46,7 @@ pub mod error; pub mod file_options; pub mod format; pub mod hash_utils; +pub mod heap_size; pub mod instant; pub mod metadata; pub mod nested_struct; @@ -61,6 +62,7 @@ pub mod test_util; pub mod tree_node; pub mod types; pub mod utils; + /// Reexport arrow crate pub use arrow; pub use column::Column; diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs index 31d6e3665757e..80ca15386308e 100644 --- a/datafusion/core/src/datasource/listing_table_factory.rs +++ b/datafusion/core/src/datasource/listing_table_factory.rs @@ -476,7 +476,7 @@ mod tests { // Test with collect_statistics enabled let file_statistics_cache = Arc::new(DefaultFileStatisticsCache::default()); let cache_config = CacheManagerConfig::default() - .with_files_statistics_cache(Some(file_statistics_cache.clone())); + .with_file_statistics_cache(Some(file_statistics_cache.clone())); let runtime = RuntimeEnvBuilder::new() .with_cache_manager(cache_config) .build_arc() @@ -506,7 +506,7 @@ mod tests { // Test with collect_statistics disabled let file_statistics_cache = Arc::new(DefaultFileStatisticsCache::default()); let cache_config = CacheManagerConfig::default() - .with_files_statistics_cache(Some(file_statistics_cache.clone())); + .with_file_statistics_cache(Some(file_statistics_cache.clone())); let runtime = RuntimeEnvBuilder::new() .with_cache_manager(cache_config) .build_arc() diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 95403dddac7be..5db36f1cd923b 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -101,6 +101,7 @@ use datafusion_session::SessionStore; use async_trait::async_trait; use chrono::{DateTime, Utc}; +use datafusion_execution::cache::cache_unit::DEFAULT_FILE_STATISTICS_MEMORY_LIMIT; use object_store::ObjectStore; use parking_lot::RwLock; use url::Url; @@ -1182,6 +1183,10 @@ impl SessionContext { let duration = Self::parse_duration(variable, value)?; builder.with_object_list_cache_ttl(Some(duration)) } + "file_statistics_cache_limit" => { + let limit = Self::parse_memory_limit(value)?; + builder.with_file_statistics_cache_limit(limit) + } _ => return plan_err!("Unknown runtime configuration: {variable}"), // Remember to update `reset_runtime_variable()` when adding new options }; @@ -1221,9 +1226,13 @@ impl SessionContext { builder = builder.with_object_list_cache_ttl(DEFAULT_LIST_FILES_CACHE_TTL); } + "file_statistics_cache_limit" => { + builder = builder.with_file_statistics_cache_limit( + DEFAULT_FILE_STATISTICS_MEMORY_LIMIT, + ); + } _ => return plan_err!("Unknown runtime configuration: {variable}"), }; - *state = SessionStateBuilder::from(state.clone()) .with_runtime_env(Arc::new(builder.build()?)) .build(); diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs index 84396be8a6a67..da89b89cee116 100644 --- a/datafusion/core/tests/parquet/file_statistics.rs +++ b/datafusion/core/tests/parquet/file_statistics.rs @@ -267,7 +267,7 @@ fn get_cache_runtime_state() -> ( let list_file_cache = Arc::new(DefaultListFilesCache::default()); let cache_config = cache_config - .with_files_statistics_cache(Some(file_static_cache.clone())) + .with_file_statistics_cache(Some(file_static_cache.clone())) .with_list_files_cache(Some(list_file_cache.clone())); let rt = RuntimeEnvBuilder::new() diff --git a/datafusion/core/tests/sql/runtime_config.rs b/datafusion/core/tests/sql/runtime_config.rs index cf5237d725805..6b7f0568309ce 100644 --- a/datafusion/core/tests/sql/runtime_config.rs +++ b/datafusion/core/tests/sql/runtime_config.rs @@ -25,6 +25,7 @@ use datafusion::execution::context::TaskContext; use datafusion::prelude::SessionConfig; use datafusion_execution::cache::DefaultListFilesCache; use datafusion_execution::cache::cache_manager::CacheManagerConfig; +use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache; use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_physical_plan::common::collect; @@ -344,6 +345,51 @@ async fn test_list_files_cache_ttl() { assert_eq!(get_limit(&ctx), Duration::from_secs(90)); } +#[tokio::test] +async fn test_file_statistics_cache_limit() { + let list_files_cache = Arc::new(DefaultFileStatisticsCache::default()); + + let rt = RuntimeEnvBuilder::new() + .with_cache_manager( + CacheManagerConfig::default() + .with_file_statistics_cache(Some(list_files_cache)), + ) + .build_arc() + .unwrap(); + + let ctx = SessionContext::new_with_config_rt(SessionConfig::default(), rt); + + let update_limit = async |ctx: &SessionContext, limit: &str| { + ctx.sql( + format!("SET datafusion.runtime.file_statistics_cache_limit = '{limit}'") + .as_str(), + ) + .await + .unwrap() + .collect() + .await + .unwrap(); + }; + + let get_limit = |ctx: &SessionContext| -> usize { + ctx.task_ctx() + .runtime_env() + .cache_manager + .get_file_statistic_cache() + .unwrap() + .cache_limit() + }; + + update_limit(&ctx, "1M").await; + assert_eq!(get_limit(&ctx), 1024 * 1024); + + update_limit(&ctx, "42G").await; + assert_eq!(get_limit(&ctx), 42 * 1024 * 1024 * 1024); + + update_limit(&ctx, "23K").await; + assert_eq!(get_limit(&ctx), 23 * 1024); +} + #[tokio::test] async fn test_unknown_runtime_config() { let ctx = SessionContext::new(); diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 0868c968c3a2f..7de993210b7b7 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -17,10 +17,14 @@ use crate::cache::CacheAccessor; use crate::cache::DefaultListFilesCache; -use crate::cache::cache_unit::DefaultFilesMetadataCache; +use crate::cache::cache_unit::{ + DEFAULT_FILE_STATISTICS_MEMORY_LIMIT, DefaultFileStatisticsCache, + DefaultFilesMetadataCache, +}; use crate::cache::list_files_cache::ListFilesEntry; use crate::cache::list_files_cache::TableScopedPath; use datafusion_common::TableReference; +use datafusion_common::heap_size::DFHeapSize; use datafusion_common::stats::Precision; use datafusion_common::{Result, Statistics}; use datafusion_physical_expr_common::sort_expr::LexOrdering; @@ -41,7 +45,7 @@ pub use super::list_files_cache::{ /// /// This struct embeds the [`ObjectMeta`] used for cache validation, /// along with the cached statistics and ordering information. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct CachedFileMetadata { /// File metadata used for cache validation (size, last_modified). pub meta: ObjectMeta, @@ -81,7 +85,7 @@ impl CachedFileMetadata { /// - Statistics for the file /// - Ordering information for the file /// -/// If enabled via [`CacheManagerConfig::with_files_statistics_cache`] this +/// If enabled via [`CacheManagerConfig::with_file_statistics_cache`] this /// cache avoids inferring the same file statistics repeatedly during the /// session lifetime. /// @@ -92,10 +96,26 @@ impl CachedFileMetadata { /// /// See [`crate::runtime_env::RuntimeEnv`] for more details pub trait FileStatisticsCache: CacheAccessor { + fn cache_limit(&self) -> usize; + + /// Updates the cache with a new memory limit in bytes. + fn update_cache_limit(&self, limit: usize); + /// Retrieves the information about the entries currently cached. fn list_entries(&self) -> HashMap; } +impl DFHeapSize for CachedFileMetadata { + fn heap_size(&self) -> usize { + self.meta.size.heap_size() + + self.meta.last_modified.heap_size() + + self.meta.version.heap_size() + + self.meta.e_tag.heap_size() + + self.meta.location.as_ref().heap_size() + + self.statistics.heap_size() + } +} + /// Represents information about a cached statistics entry. /// This is used to expose the statistics cache contents to outside modules. #[derive(Debug, Clone, PartialEq, Eq)] @@ -330,8 +350,19 @@ pub struct CacheManager { impl CacheManager { pub fn try_new(config: &CacheManagerConfig) -> Result> { - let file_statistic_cache = - config.table_files_statistics_cache.as_ref().map(Arc::clone); + let file_statistic_cache = match &config.file_statistics_cache { + Some(fsc) if config.file_statistics_cache_limit > 0 => { + fsc.update_cache_limit(config.file_statistics_cache_limit); + Some(Arc::clone(fsc)) + } + None if config.file_statistics_cache_limit > 0 => { + let fsc: Arc = Arc::new( + DefaultFileStatisticsCache::new(config.file_statistics_cache_limit), + ); + Some(fsc) + } + _ => None, + }; let list_files_cache = match &config.list_files_cache { Some(lfc) if config.list_files_cache_limit > 0 => { @@ -371,11 +402,18 @@ impl CacheManager { })) } - /// Get the cache of listing files statistics. + /// Get the file statistics cache. pub fn get_file_statistic_cache(&self) -> Option> { self.file_statistic_cache.clone() } + /// Get the memory limit of the file statistics cache. + pub fn get_file_statistic_cache_limit(&self) -> usize { + self.file_statistic_cache + .as_ref() + .map_or(0, |c| c.cache_limit()) + } + /// Get the cache for storing the result of listing [`ObjectMeta`]s under the same path. pub fn get_list_files_cache(&self) -> Option> { self.list_files_cache.clone() @@ -411,7 +449,9 @@ pub struct CacheManagerConfig { /// Enable caching of file statistics when listing files. /// Enabling the cache avoids repeatedly reading file statistics in a DataFusion session. /// Default is disabled. Currently only Parquet files are supported. - pub table_files_statistics_cache: Option>, + pub file_statistics_cache: Option>, + /// Limit of the file statistics cache, in bytes. Default: 1MiB. + pub file_statistics_cache_limit: usize, /// Enable caching of file metadata when listing files. /// Enabling the cache avoids repeat list and object metadata fetch operations, which may be /// expensive in certain situations (e.g. remote object storage), for objects under paths that @@ -437,7 +477,8 @@ pub struct CacheManagerConfig { impl Default for CacheManagerConfig { fn default() -> Self { Self { - table_files_statistics_cache: Default::default(), + file_statistics_cache: Default::default(), + file_statistics_cache_limit: DEFAULT_FILE_STATISTICS_MEMORY_LIMIT, list_files_cache: Default::default(), list_files_cache_limit: DEFAULT_LIST_FILES_CACHE_MEMORY_LIMIT, list_files_cache_ttl: DEFAULT_LIST_FILES_CACHE_TTL, @@ -448,14 +489,19 @@ impl Default for CacheManagerConfig { } impl CacheManagerConfig { - /// Set the cache for files statistics. + /// Set the cache for file statistics. /// /// Default is `None` (disabled). - pub fn with_files_statistics_cache( + pub fn with_file_statistics_cache( mut self, cache: Option>, ) -> Self { - self.table_files_statistics_cache = cache; + self.file_statistics_cache = cache; + self + } + + pub fn with_file_statistics_cache_limit(mut self, limit: usize) -> Self { + self.file_statistics_cache_limit = limit; self } diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 49e16ca4b6cbf..4e2b1eeac988e 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -15,17 +15,17 @@ // specific language governing permissions and limitations // under the License. -use std::collections::HashMap; - use crate::cache::CacheAccessor; use crate::cache::cache_manager::{ CachedFileMetadata, FileStatisticsCache, FileStatisticsCacheEntry, }; - -use dashmap::DashMap; -use object_store::path::Path; +use std::collections::HashMap; +use std::sync::Mutex; pub use crate::cache::DefaultFilesMetadataCache; +use crate::cache::lru_queue::LruQueue; +use datafusion_common::heap_size::DFHeapSize; +use object_store::path::Path; /// Default implementation of [`FileStatisticsCache`] /// @@ -41,32 +41,137 @@ pub use crate::cache::DefaultFilesMetadataCache; /// [`FileStatisticsCache`]: crate::cache::cache_manager::FileStatisticsCache #[derive(Default)] pub struct DefaultFileStatisticsCache { - cache: DashMap, + state: Mutex, +} + +impl DefaultFileStatisticsCache { + pub fn new(memory_limit: usize) -> Self { + Self { + state: Mutex::new(DefaultFileStatisticsCacheState::new(memory_limit)), + } + } + + /// Returns the size of the cached memory, in bytes. + pub fn memory_used(&self) -> usize { + let state = self.state.lock().unwrap(); + state.memory_used + } +} + +pub struct DefaultFileStatisticsCacheState { + lru_queue: LruQueue, + memory_limit: usize, + memory_used: usize, +} + +pub const DEFAULT_FILE_STATISTICS_MEMORY_LIMIT: usize = 1024 * 1024; // 1MiB + +impl Default for DefaultFileStatisticsCacheState { + fn default() -> Self { + Self { + lru_queue: LruQueue::new(), + memory_limit: DEFAULT_FILE_STATISTICS_MEMORY_LIMIT, + memory_used: 0, + } + } } +impl DefaultFileStatisticsCacheState { + fn new(memory_limit: usize) -> Self { + Self { + lru_queue: LruQueue::new(), + memory_limit, + memory_used: 0, + } + } + fn get(&mut self, key: &Path) -> Option { + self.lru_queue.get(key).cloned() + } + + fn put( + &mut self, + key: &Path, + value: CachedFileMetadata, + ) -> Option { + let entry_size = value.heap_size(); + + if entry_size > self.memory_limit { + return None; + } + + let old_value = self.lru_queue.put(key.clone(), value); + self.memory_used += entry_size; + + if let Some(old_entry) = &old_value { + self.memory_used -= old_entry.heap_size(); + } + + self.evict_entries(); + + old_value + } + + fn remove(&mut self, k: &Path) -> Option { + self.lru_queue.remove(k) + } + + fn contains_key(&self, k: &Path) -> bool { + self.lru_queue.contains_key(k) + } + + fn len(&self) -> usize { + self.lru_queue.len() + } + + fn clear(&mut self) { + self.lru_queue.clear(); + self.memory_used = 0; + } + + fn evict_entries(&mut self) { + while self.memory_used > self.memory_limit { + if let Some(removed) = self.lru_queue.pop() { + self.memory_used -= removed.1.heap_size(); + } else { + // cache is empty while memory_used > memory_limit, cannot happen + debug_assert!( + false, + "cache is empty while memory_used > memory_limit, cannot happen" + ); + return; + } + } + } +} impl CacheAccessor for DefaultFileStatisticsCache { fn get(&self, key: &Path) -> Option { - self.cache.get(key).map(|entry| entry.value().clone()) + let mut state = self.state.lock().unwrap(); + state.get(key) } fn put(&self, key: &Path, value: CachedFileMetadata) -> Option { - self.cache.insert(key.clone(), value) + let mut state = self.state.lock().unwrap(); + state.put(key, value) } - fn remove(&self, k: &Path) -> Option { - self.cache.remove(k).map(|(_, entry)| entry) + fn remove(&self, key: &Path) -> Option { + let mut state = self.state.lock().unwrap(); + state.remove(key) } fn contains_key(&self, k: &Path) -> bool { - self.cache.contains_key(k) + let state = self.state.lock().unwrap(); + state.contains_key(k) } fn len(&self) -> usize { - self.cache.len() + let state = self.state.lock().unwrap(); + state.len() } fn clear(&self) { - self.cache.clear(); + let mut state = self.state.lock().unwrap(); + state.clear(); } fn name(&self) -> String { @@ -75,12 +180,22 @@ impl CacheAccessor for DefaultFileStatisticsCache { } impl FileStatisticsCache for DefaultFileStatisticsCache { + fn cache_limit(&self) -> usize { + let state = self.state.lock().unwrap(); + state.memory_limit + } + + fn update_cache_limit(&self, limit: usize) { + let mut state = self.state.lock().unwrap(); + state.memory_limit = limit; + state.evict_entries(); + } + fn list_entries(&self) -> HashMap { let mut entries = HashMap::::new(); - - for entry in self.cache.iter() { - let path = entry.key(); - let cached = entry.value(); + for entry in self.state.lock().unwrap().lru_queue.list_entries() { + let path = entry.0.clone(); + let cached = entry.1.clone(); entries.insert( path.clone(), FileStatisticsCacheEntry { @@ -88,7 +203,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { num_rows: cached.statistics.num_rows, num_columns: cached.statistics.column_statistics.len(), table_size_bytes: cached.statistics.total_byte_size, - statistics_size_bytes: 0, // TODO: set to the real size in the future + statistics_size_bytes: cached.statistics.heap_size(), has_ordering: cached.ordering.is_some(), }, ); @@ -104,11 +219,12 @@ mod tests { use crate::cache::cache_manager::{ CachedFileMetadata, FileStatisticsCache, FileStatisticsCacheEntry, }; - use arrow::array::RecordBatch; + use arrow::array::{Int32Array, ListArray, RecordBatch}; + use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; use chrono::DateTime; - use datafusion_common::Statistics; use datafusion_common::stats::Precision; + use datafusion_common::{ColumnStatistics, ScalarValue, Statistics}; use datafusion_expr::ColumnarValue; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; @@ -389,7 +505,7 @@ mod tests { num_rows: Precision::Absent, num_columns: 1, table_size_bytes: Precision::Absent, - statistics_size_bytes: 0, + statistics_size_bytes: 72, has_ordering: false, } ), @@ -400,11 +516,100 @@ mod tests { num_rows: Precision::Absent, num_columns: 1, table_size_bytes: Precision::Absent, - statistics_size_bytes: 0, + statistics_size_bytes: 72, has_ordering: true, } ), ]) ); } + + #[test] + fn test_cache_entry_added_when_entries_are_within_cache_limit() { + let (meta_1, value_1) = create_cached_file_metadata_with_stats("test1.parquet"); + let (meta_2, value_2) = create_cached_file_metadata_with_stats("test2.parquet"); + let (meta_3, value_3) = create_cached_file_metadata_with_stats("test3.parquet"); + + let limit_for_2_entries = value_1.heap_size() + value_2.heap_size(); + + // create a cache with a limit which fits exactly 2 entries + let cache = DefaultFileStatisticsCache::new(limit_for_2_entries); + + cache.put(&meta_1.location, value_1.clone()); + cache.put(&meta_2.location, value_2.clone()); + + assert_eq!(cache.len(), 2); + assert_eq!(cache.memory_used(), limit_for_2_entries); + + let result_1 = cache.get(&meta_1.location); + let result_2 = cache.get(&meta_2.location); + assert_eq!(result_1.unwrap(), value_1); + assert_eq!(result_2.unwrap(), value_2); + + // adding the third entry evicts the first entry + cache.put(&meta_3.location, value_3.clone()); + assert_eq!(cache.len(), 2); + assert_eq!(cache.memory_used(), limit_for_2_entries); + + let result_1 = cache.get(&meta_1.location); + assert!(result_1.is_none()); + + let result_2 = cache.get(&meta_2.location); + let result_3 = cache.get(&meta_3.location); + + assert_eq!(result_2.unwrap(), value_2); + assert_eq!(result_3.unwrap(), value_3); + + cache.remove(&meta_2.location); + + assert_eq!(cache.len(), 1); + + cache.clear(); + assert_eq!(cache.len(), 0); + } + + #[test] + fn test_cache_rejects_entry_which_is_too_large() { + let (meta, value) = create_cached_file_metadata_with_stats("test1.parquet"); + + let limit_less_than_the_entry = value.heap_size() - 1; + + // create a cache with a size less than the entry + let cache = DefaultFileStatisticsCache::new(limit_less_than_the_entry); + + cache.put(&meta.location, value); + + assert_eq!(cache.len(), 0); + assert_eq!(cache.memory_used(), 0); + } + + fn create_cached_file_metadata_with_stats( + file_name: &str, + ) -> (ObjectMeta, CachedFileMetadata) { + let series: Vec = (0..=10).step_by(1).collect(); + let values = Int32Array::from(series); + let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0])); + let field = Arc::new(Field::new_list_field(DataType::Int32, false)); + let list_array = ListArray::new(field, offsets, Arc::new(values), None); + + let column_statistics = ColumnStatistics { + null_count: Precision::Exact(1), + max_value: Precision::Exact(ScalarValue::List(Arc::new(list_array.clone()))), + min_value: Precision::Exact(ScalarValue::List(Arc::new(list_array.clone()))), + sum_value: Precision::Exact(ScalarValue::List(Arc::new(list_array.clone()))), + distinct_count: Precision::Exact(10), + byte_size: Precision::Absent, + }; + + let stats = Statistics { + num_rows: Precision::Exact(100), + total_byte_size: Precision::Exact(100), + column_statistics: vec![column_statistics.clone()], + }; + + let object_meta = create_test_meta(file_name, stats.heap_size() as u64); + let value = + CachedFileMetadata::new(object_meta.clone(), Arc::new(stats.clone()), None); + (object_meta, value) + } } diff --git a/datafusion/execution/src/runtime_env.rs b/datafusion/execution/src/runtime_env.rs index 67604c424c766..e393a7a127873 100644 --- a/datafusion/execution/src/runtime_env.rs +++ b/datafusion/execution/src/runtime_env.rs @@ -103,6 +103,7 @@ fn create_runtime_config_entries( metadata_cache_limit: Option, list_files_cache_limit: Option, list_files_cache_ttl: Option, + file_statistics_cache_limit: Option, ) -> Vec { vec![ ConfigEntry { @@ -135,6 +136,11 @@ fn create_runtime_config_entries( value: list_files_cache_ttl, description: "TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes.", }, + ConfigEntry { + key: "datafusion.runtime.file_statistics_cache_limit".to_string(), + value: file_statistics_cache_limit, + description: "Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.", + }, ] } @@ -296,6 +302,14 @@ impl RuntimeEnv { .get_list_files_cache_ttl() .map(format_duration); + let file_statistics_cache_limit = + self.cache_manager.get_file_statistic_cache_limit(); + let file_statistics_cache_value = format_byte_size( + file_statistics_cache_limit + .try_into() + .expect("File statistics cache size conversion failed"), + ); + create_runtime_config_entries( memory_limit_value, Some(max_temp_dir_value), @@ -303,6 +317,7 @@ impl RuntimeEnv { Some(metadata_cache_value), Some(list_files_cache_value), list_files_cache_ttl, + Some(file_statistics_cache_value), ) } } @@ -438,6 +453,11 @@ impl RuntimeEnvBuilder { self } + pub fn with_file_statistics_cache_limit(mut self, limit: usize) -> Self { + self.cache_manager = self.cache_manager.with_file_statistics_cache_limit(limit); + self + } + /// Build a RuntimeEnv pub fn build(self) -> Result { let Self { @@ -475,9 +495,10 @@ impl RuntimeEnvBuilder { /// Create a new RuntimeEnvBuilder from an existing RuntimeEnv pub fn from_runtime_env(runtime_env: &RuntimeEnv) -> Self { let cache_config = CacheManagerConfig { - table_files_statistics_cache: runtime_env + file_statistics_cache: runtime_env.cache_manager.get_file_statistic_cache(), + file_statistics_cache_limit: runtime_env .cache_manager - .get_file_statistic_cache(), + .get_file_statistic_cache_limit(), list_files_cache: runtime_env.cache_manager.get_list_files_cache(), list_files_cache_limit: runtime_env .cache_manager @@ -514,6 +535,7 @@ impl RuntimeEnvBuilder { Some("50M".to_owned()), Some("1M".to_owned()), None, + Some("1M".to_owned()), ) } diff --git a/datafusion/sqllogictest/test_files/encrypted_parquet.slt b/datafusion/sqllogictest/test_files/encrypted_parquet.slt index d580b7d1ad2b8..fd375778b7a53 100644 --- a/datafusion/sqllogictest/test_files/encrypted_parquet.slt +++ b/datafusion/sqllogictest/test_files/encrypted_parquet.slt @@ -77,6 +77,10 @@ ORDER BY double_field 3 4 5 6 +# Disable file statistics cache because file statistics have been previously created +statement ok +set datafusion.runtime.file_statistics_cache_limit = "0K"; + statement count 0 CREATE EXTERNAL TABLE parquet_table ( diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index b04c78bd2774c..b4faa414e3acb 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -331,6 +331,7 @@ datafusion.optimizer.repartition_windows true datafusion.optimizer.skip_failed_rules false datafusion.optimizer.subset_repartition_threshold 4 datafusion.optimizer.top_down_join_key_reordering true +datafusion.runtime.file_statistics_cache_limit 1M datafusion.optimizer.use_statistics_registry false datafusion.runtime.list_files_cache_limit 1M datafusion.runtime.list_files_cache_ttl NULL @@ -478,6 +479,7 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail datafusion.optimizer.subset_repartition_threshold 4 Partition count threshold for subset satisfaction optimization. When the current partition count is >= this threshold, DataFusion will skip repartitioning if the required partitioning expression is a subset of the current partition expression such as Hash(a) satisfies Hash(a, b). When the current partition count is < this threshold, DataFusion will repartition to increase parallelism even when subset satisfaction applies. Set to 0 to always repartition (disable subset satisfaction optimization). Set to a high value to always use subset satisfaction. Example (subset_repartition_threshold = 4): ```text Hash([a]) satisfies Hash([a, b]) because (Hash([a, b]) is subset of Hash([a]) If current partitions (3) < threshold (4), repartition: AggregateExec: mode=FinalPartitioned, gby=[a, b], aggr=[SUM(x)] RepartitionExec: partitioning=Hash([a, b], 8), input_partitions=3 AggregateExec: mode=Partial, gby=[a, b], aggr=[SUM(x)] DataSourceExec: file_groups={...}, output_partitioning=Hash([a], 3) If current partitions (8) >= threshold (4), use subset satisfaction: AggregateExec: mode=SinglePartitioned, gby=[a, b], aggr=[SUM(x)] DataSourceExec: file_groups={...}, output_partitioning=Hash([a], 8) ``` datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys +datafusion.runtime.file_statistics_cache_limit 1M Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. datafusion.optimizer.use_statistics_registry false When set to true, the physical plan optimizer uses the pluggable `StatisticsRegistry` for statistics propagation across operators. This enables more accurate cardinality estimates compared to each operator's built-in `partition_statistics`. datafusion.runtime.list_files_cache_limit 1M Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. datafusion.runtime.list_files_cache_ttl NULL TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes. diff --git a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt index a4a613e383ec8..53ec7e72d9f16 100644 --- a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt +++ b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt @@ -177,6 +177,10 @@ physical_plan statement ok DROP TABLE test_table; +# Disable file statistics cache because file statistics have been previously created +statement ok +set datafusion.runtime.file_statistics_cache_limit = "0K"; + statement ok CREATE EXTERNAL TABLE test_table ( partition_col TEXT NOT NULL, diff --git a/datafusion/sqllogictest/test_files/set_variable.slt b/datafusion/sqllogictest/test_files/set_variable.slt index f270b9b169572..42a12ab6a7542 100644 --- a/datafusion/sqllogictest/test_files/set_variable.slt +++ b/datafusion/sqllogictest/test_files/set_variable.slt @@ -351,6 +351,12 @@ RESET datafusion.runtime.memory_limit statement ok EXPLAIN ANALYZE SELECT * FROM generate_series(1, 1000) AS t1(v1) ORDER BY v1 +statement ok +SET datafusion.runtime.file_statistics_cache_limit = '1K' + +statement ok +RESET datafusion.runtime.file_statistics_cache_limit + statement ok SET datafusion.runtime.list_files_cache_limit = '1K' @@ -605,6 +611,15 @@ SHOW datafusion.runtime.max_temp_directory_size ---- datafusion.runtime.max_temp_directory_size 10G +# Test SET and SHOW rruntime.file_statistics_cache_limit +statement ok +SET datafusion.runtime.file_statistics_cache_limit = '42M' + +query TT +SHOW datafusion.runtime.file_statistics_cache_limit +---- +datafusion.runtime.file_statistics_cache_limit 42M + # Test SET and SHOW runtime.metadata_cache_limit statement ok SET datafusion.runtime.metadata_cache_limit = '200M' @@ -639,6 +654,7 @@ datafusion.runtime.list_files_cache_ttl 1m30s query T SELECT name FROM information_schema.df_settings WHERE name LIKE 'datafusion.runtime.%' ORDER BY name ---- +datafusion.runtime.file_statistics_cache_limit datafusion.runtime.list_files_cache_limit datafusion.runtime.list_files_cache_ttl datafusion.runtime.max_temp_directory_size From 1f526313a8dab89d8d5a296a9c393c83b98a2059 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 28 Jan 2026 17:00:13 +0100 Subject: [PATCH 02/70] fixup! Add a default FileStatisticsCache implementation for the ListingTable --- docs/source/user-guide/configs.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 46039f3c99c27..6a8014ddf1d8f 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -229,14 +229,15 @@ SET datafusion.runtime.memory_limit = '2G'; The following runtime configuration settings are available: -| key | default | description | -| ------------------------------------------ | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| datafusion.runtime.list_files_cache_limit | 1M | Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | -| datafusion.runtime.list_files_cache_ttl | NULL | TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes. | -| datafusion.runtime.max_temp_directory_size | 100G | Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | -| datafusion.runtime.memory_limit | NULL | Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | -| datafusion.runtime.metadata_cache_limit | 50M | Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | -| datafusion.runtime.temp_directory | NULL | The path to the temporary file directory. | +| key | default | description | +| ---------------------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| datafusion.runtime.file_statistics_cache_limit | 1M | Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | +| datafusion.runtime.list_files_cache_limit | 1M | Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | +| datafusion.runtime.list_files_cache_ttl | NULL | TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes. | +| datafusion.runtime.max_temp_directory_size | 100G | Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | +| datafusion.runtime.memory_limit | NULL | Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | +| datafusion.runtime.metadata_cache_limit | 50M | Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | +| datafusion.runtime.temp_directory | NULL | The path to the temporary file directory. | # Tuning Guide From fff10b96d80da016848da1882cf5b0b3e36fc9af Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 4 Feb 2026 12:59:27 +0100 Subject: [PATCH 03/70] Adapt memory usage when removing entries --- datafusion/execution/src/cache/cache_unit.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 4e2b1eeac988e..fe7950c218593 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -112,7 +112,12 @@ impl DefaultFileStatisticsCacheState { } fn remove(&mut self, k: &Path) -> Option { - self.lru_queue.remove(k) + if let Some(old_entry) = self.lru_queue.remove(k) { + self.memory_used -= old_entry.heap_size(); + Some(old_entry) + } else { + None + } } fn contains_key(&self, k: &Path) -> bool { @@ -561,11 +566,14 @@ mod tests { assert_eq!(result_3.unwrap(), value_3); cache.remove(&meta_2.location); - assert_eq!(cache.len(), 1); + assert_eq!(cache.memory_used(), value_3.heap_size()); + cache.clear(); assert_eq!(cache.len(), 0); + assert_eq!(cache.memory_used(), 0); + } #[test] From 022a9be3c3013f6399bcf399a852295759a23ff0 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 4 Feb 2026 13:00:37 +0100 Subject: [PATCH 04/70] Adapt heapsize for &str --- datafusion/common/src/heap_size.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 6dee7d5c0a373..479e6aa85f73b 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -314,7 +314,7 @@ impl DFHeapSize for String { impl DFHeapSize for str { fn heap_size(&self) -> usize { - self.to_string().capacity() + self.as_bytes().len() } } From 22195c93a6ade9f5042f1de60c2320bd0f22215f Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 4 Feb 2026 13:08:03 +0100 Subject: [PATCH 05/70] Fix formatting --- datafusion/execution/src/cache/cache_unit.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index fe7950c218593..6db79ea7b16f5 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -569,11 +569,9 @@ mod tests { assert_eq!(cache.len(), 1); assert_eq!(cache.memory_used(), value_3.heap_size()); - cache.clear(); assert_eq!(cache.len(), 0); assert_eq!(cache.memory_used(), 0); - } #[test] From 385a7b775afb8396d0a8a8f420150e05eb5b45c1 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 4 Feb 2026 13:08:26 +0100 Subject: [PATCH 06/70] Adapt heapsize for &str and add another scalarvalue --- datafusion/common/src/heap_size.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 479e6aa85f73b..ca9c03894c23d 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -125,6 +125,7 @@ impl DFHeapSize for ScalarValue { DurationNanosecond(d) => d.heap_size(), Union(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), Dictionary(a, b) => a.heap_size() + b.heap_size(), + RunEndEncoded(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), } } } @@ -314,7 +315,7 @@ impl DFHeapSize for String { impl DFHeapSize for str { fn heap_size(&self) -> usize { - self.as_bytes().len() + self.len() } } From cd1a002f9f3c609f03effdaa9ecb6b59fc1d32d2 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 10:33:39 +0100 Subject: [PATCH 07/70] Add better error message --- datafusion/execution/src/cache/cache_unit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 6db79ea7b16f5..512f8c94d7efd 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -141,7 +141,7 @@ impl DefaultFileStatisticsCacheState { // cache is empty while memory_used > memory_limit, cannot happen debug_assert!( false, - "cache is empty while memory_used > memory_limit, cannot happen" + "This is a bug! Please report it to the Apache DataFusion developers" ); return; } From 370441b321f5fcf6d3a1c3b70f746b5c7775bc08 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 10:35:33 +0100 Subject: [PATCH 08/70] Add todo to add heapsize for ordering in CachedFileMetadata --- datafusion/execution/src/cache/cache_manager.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 7de993210b7b7..da1e1081da0b6 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -113,6 +113,7 @@ impl DFHeapSize for CachedFileMetadata { + self.meta.e_tag.heap_size() + self.meta.location.as_ref().heap_size() + self.statistics.heap_size() + //TODO add ordering once LexOrdering /PhysicalExpr implements DFHeapSize } } From 431aea19617813ee95de74dc3d85ca9286daa7e9 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 10:39:31 +0100 Subject: [PATCH 09/70] Fix comment/docs on DefaultFileStatisticsCache --- datafusion/execution/src/cache/cache_unit.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 512f8c94d7efd..8e34950a701b6 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -16,9 +16,7 @@ // under the License. use crate::cache::CacheAccessor; -use crate::cache::cache_manager::{ - CachedFileMetadata, FileStatisticsCache, FileStatisticsCacheEntry, -}; +use crate::cache::cache_manager::{CachedFileMetadata, FileMetadataCache, FileStatisticsCache, FileStatisticsCacheEntry}; use std::collections::HashMap; use std::sync::Mutex; @@ -36,7 +34,13 @@ use object_store::path::Path; /// 2. If `Some(cached)`, validate with `cached.is_valid_for(¤t_meta)` /// 3. If invalid or missing, compute new value and call `put(path, new_value)` /// -/// Uses DashMap for lock-free concurrent access. +/// # Internal details +/// +/// The `memory_limit` controls the maximum size of the cache, which uses a +/// Least Recently Used eviction algorithm. When adding a new entry, if the total +/// size of the cached entries exceeds `memory_limit`, the least recently used entries +/// are evicted until the total size is lower than `memory_limit`. +/// /// /// [`FileStatisticsCache`]: crate::cache::cache_manager::FileStatisticsCache #[derive(Default)] From 490f4d3d7a110d9756d2c54def7005b8f4441bea Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 10:44:20 +0100 Subject: [PATCH 10/70] Simplify test data generation --- datafusion/execution/src/cache/cache_unit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 8e34950a701b6..9f26f6104cc2f 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -596,7 +596,7 @@ mod tests { fn create_cached_file_metadata_with_stats( file_name: &str, ) -> (ObjectMeta, CachedFileMetadata) { - let series: Vec = (0..=10).step_by(1).collect(); + let series: Vec = (0..=10).collect(); let values = Int32Array::from(series); let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0])); let field = Arc::new(Field::new_list_field(DataType::Int32, false)); From 24c71a7874f1bafcce45704a1498e23f941d55b0 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 10:47:07 +0100 Subject: [PATCH 11/70] Remove potential stale entry, if entry is too large --- datafusion/execution/src/cache/cache_unit.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 9f26f6104cc2f..9166f4c3da685 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -100,6 +100,8 @@ impl DefaultFileStatisticsCacheState { let entry_size = value.heap_size(); if entry_size > self.memory_limit { + // Remove stale entry if exists + self.remove(key); return None; } From 2297c459f23fc436e814c71ac3bf7634a6cadd73 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 10:48:06 +0100 Subject: [PATCH 12/70] Fix typo in sql logic test comment --- datafusion/sqllogictest/test_files/set_variable.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/set_variable.slt b/datafusion/sqllogictest/test_files/set_variable.slt index 42a12ab6a7542..aea9b9aeea41f 100644 --- a/datafusion/sqllogictest/test_files/set_variable.slt +++ b/datafusion/sqllogictest/test_files/set_variable.slt @@ -611,7 +611,7 @@ SHOW datafusion.runtime.max_temp_directory_size ---- datafusion.runtime.max_temp_directory_size 10G -# Test SET and SHOW rruntime.file_statistics_cache_limit +# Test SET and SHOW runtime.file_statistics_cache_limit statement ok SET datafusion.runtime.file_statistics_cache_limit = '42M' From 72e866a0ca56202201244d3fac90e3bcffa49a32 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 10:52:36 +0100 Subject: [PATCH 13/70] Fix comment about default behaviour in cache manager --- datafusion/execution/src/cache/cache_manager.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index da1e1081da0b6..3ec0c1898b8bf 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -449,7 +449,7 @@ pub const DEFAULT_METADATA_CACHE_LIMIT: usize = 50 * 1024 * 1024; // 50M pub struct CacheManagerConfig { /// Enable caching of file statistics when listing files. /// Enabling the cache avoids repeatedly reading file statistics in a DataFusion session. - /// Default is disabled. Currently only Parquet files are supported. + /// Default is enabled with 1MiB. Currently only Parquet files are supported. pub file_statistics_cache: Option>, /// Limit of the file statistics cache, in bytes. Default: 1MiB. pub file_statistics_cache_limit: usize, From 9ae219ab388f01d65825cdd7173b60a97b142183 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 11:05:30 +0100 Subject: [PATCH 14/70] Fix variable name in test --- datafusion/core/tests/sql/runtime_config.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/core/tests/sql/runtime_config.rs b/datafusion/core/tests/sql/runtime_config.rs index 6b7f0568309ce..9f8c2575d7530 100644 --- a/datafusion/core/tests/sql/runtime_config.rs +++ b/datafusion/core/tests/sql/runtime_config.rs @@ -260,11 +260,11 @@ async fn test_test_metadata_cache_limit() { #[tokio::test] async fn test_list_files_cache_limit() { - let list_files_cache = Arc::new(DefaultListFilesCache::default()); + let file_statistics_cache = Arc::new(DefaultListFilesCache::default()); let rt = RuntimeEnvBuilder::new() .with_cache_manager( - CacheManagerConfig::default().with_list_files_cache(Some(list_files_cache)), + CacheManagerConfig::default().with_list_files_cache(Some(file_statistics_cache)), ) .build_arc() .unwrap(); From 556ec42a3e87ec4ec2ad38b4db61ec250b8eb3ea Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 11:07:29 +0100 Subject: [PATCH 15/70] Fix variable name in test --- datafusion/core/tests/sql/runtime_config.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/core/tests/sql/runtime_config.rs b/datafusion/core/tests/sql/runtime_config.rs index 9f8c2575d7530..5998148c42d0b 100644 --- a/datafusion/core/tests/sql/runtime_config.rs +++ b/datafusion/core/tests/sql/runtime_config.rs @@ -260,11 +260,11 @@ async fn test_test_metadata_cache_limit() { #[tokio::test] async fn test_list_files_cache_limit() { - let file_statistics_cache = Arc::new(DefaultListFilesCache::default()); + let list_files_cache = Arc::new(DefaultListFilesCache::default()); let rt = RuntimeEnvBuilder::new() .with_cache_manager( - CacheManagerConfig::default().with_list_files_cache(Some(file_statistics_cache)), + CacheManagerConfig::default().with_list_files_cache(Some(list_files_cache)), ) .build_arc() .unwrap(); @@ -347,12 +347,12 @@ async fn test_list_files_cache_ttl() { #[tokio::test] async fn test_file_statistics_cache_limit() { - let list_files_cache = Arc::new(DefaultFileStatisticsCache::default()); + let file_statistics_cache = Arc::new(DefaultFileStatisticsCache::default()); let rt = RuntimeEnvBuilder::new() .with_cache_manager( CacheManagerConfig::default() - .with_file_statistics_cache(Some(list_files_cache)), + .with_file_statistics_cache(Some(file_statistics_cache)), ) .build_arc() .unwrap(); From 4cd48b35c1f8f7f59f031946af430cd81fd10ed3 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 12:31:05 +0100 Subject: [PATCH 16/70] Disable cache for sql logic test --- datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt index 85f9549357138..80a1a838cb7e9 100644 --- a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt +++ b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt @@ -37,6 +37,9 @@ COPY ( ) TO 'test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet' STORED AS PARQUET; +statement ok +set datafusion.runtime.file_statistics_cache_limit = "0K"; + ## Create table without filter pushdown ## (pushdown setting is part of the table, but is copied from the session settings) From c4e224082102fd4e87e704e49e17b08dfd349695 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 11 Feb 2026 15:35:43 +0100 Subject: [PATCH 17/70] Include key into memory estimation --- datafusion/common/Cargo.toml | 2 +- datafusion/common/src/heap_size.rs | 7 +++++++ datafusion/execution/src/cache/cache_unit.rs | 11 +++++++---- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 740d4e45b8d05..e8b03bedc57e4 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -77,7 +77,7 @@ indexmap = { workspace = true } itertools = { workspace = true } libc = "0.2.185" log = { workspace = true } -object_store = { workspace = true, optional = true } +object_store = { workspace = true, optional = true, default-features = true } parquet = { workspace = true, optional = true, default-features = true } recursive = { workspace = true, optional = true } sqlparser = { workspace = true, optional = true } diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index ca9c03894c23d..c9f6b4671e1f5 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -29,6 +29,7 @@ use half::f16; use std::collections::HashMap; use std::fmt::Debug; use std::sync::Arc; +use object_store::path::Path; /// This is a temporary solution until and /// are resolved. @@ -179,6 +180,12 @@ impl DFHeapSize for DataType { } } +impl DFHeapSize for Path { + fn heap_size(&self) -> usize { + self.as_ref().heap_size() + } +} + impl DFHeapSize for Vec { fn heap_size(&self) -> usize { let item_size = size_of::(); diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 9166f4c3da685..90dbaff621810 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -97,16 +97,17 @@ impl DefaultFileStatisticsCacheState { key: &Path, value: CachedFileMetadata, ) -> Option { + let key_size = key.heap_size(); let entry_size = value.heap_size(); - if entry_size > self.memory_limit { + if entry_size + key_size > self.memory_limit { // Remove stale entry if exists self.remove(key); return None; } let old_value = self.lru_queue.put(key.clone(), value); - self.memory_used += entry_size; + self.memory_used += entry_size + key_size; if let Some(old_entry) = &old_value { self.memory_used -= old_entry.heap_size(); @@ -119,6 +120,7 @@ impl DefaultFileStatisticsCacheState { fn remove(&mut self, k: &Path) -> Option { if let Some(old_entry) = self.lru_queue.remove(k) { + self.memory_used -= k.heap_size(); self.memory_used -= old_entry.heap_size(); Some(old_entry) } else { @@ -142,6 +144,7 @@ impl DefaultFileStatisticsCacheState { fn evict_entries(&mut self) { while self.memory_used > self.memory_limit { if let Some(removed) = self.lru_queue.pop() { + self.memory_used -= removed.0.heap_size(); self.memory_used -= removed.1.heap_size(); } else { // cache is empty while memory_used > memory_limit, cannot happen @@ -541,7 +544,7 @@ mod tests { let (meta_2, value_2) = create_cached_file_metadata_with_stats("test2.parquet"); let (meta_3, value_3) = create_cached_file_metadata_with_stats("test3.parquet"); - let limit_for_2_entries = value_1.heap_size() + value_2.heap_size(); + let limit_for_2_entries = &meta_1.location.heap_size() + value_1.heap_size() + &meta_2.location.heap_size() + value_2.heap_size(); // create a cache with a limit which fits exactly 2 entries let cache = DefaultFileStatisticsCache::new(limit_for_2_entries); @@ -573,7 +576,7 @@ mod tests { cache.remove(&meta_2.location); assert_eq!(cache.len(), 1); - assert_eq!(cache.memory_used(), value_3.heap_size()); + assert_eq!(cache.memory_used(), &meta_3.location.heap_size() + value_3.heap_size()); cache.clear(); assert_eq!(cache.len(), 0); From 6c6be163bc9cd9d3b70af246f73052906e17ab41 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 11 Feb 2026 15:45:29 +0100 Subject: [PATCH 18/70] Fix fmt --- datafusion/common/src/heap_size.rs | 4 ++-- datafusion/execution/src/cache/cache_unit.rs | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index c9f6b4671e1f5..ad0855bbe9d4c 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -26,10 +26,10 @@ use arrow::datatypes::{ }; use chrono::{DateTime, Utc}; use half::f16; +use object_store::path::Path; use std::collections::HashMap; use std::fmt::Debug; use std::sync::Arc; -use object_store::path::Path; /// This is a temporary solution until and /// are resolved. @@ -182,7 +182,7 @@ impl DFHeapSize for DataType { impl DFHeapSize for Path { fn heap_size(&self) -> usize { - self.as_ref().heap_size() + self.as_ref().heap_size() } } diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 90dbaff621810..9f13c9b8e7418 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -16,14 +16,16 @@ // under the License. use crate::cache::CacheAccessor; -use crate::cache::cache_manager::{CachedFileMetadata, FileMetadataCache, FileStatisticsCache, FileStatisticsCacheEntry}; +use crate::cache::cache_manager::{ + CachedFileMetadata, FileMetadataCache, FileStatisticsCache, FileStatisticsCacheEntry, +}; +use object_store::path::Path; use std::collections::HashMap; use std::sync::Mutex; pub use crate::cache::DefaultFilesMetadataCache; use crate::cache::lru_queue::LruQueue; use datafusion_common::heap_size::DFHeapSize; -use object_store::path::Path; /// Default implementation of [`FileStatisticsCache`] /// @@ -544,7 +546,10 @@ mod tests { let (meta_2, value_2) = create_cached_file_metadata_with_stats("test2.parquet"); let (meta_3, value_3) = create_cached_file_metadata_with_stats("test3.parquet"); - let limit_for_2_entries = &meta_1.location.heap_size() + value_1.heap_size() + &meta_2.location.heap_size() + value_2.heap_size(); + let limit_for_2_entries = &meta_1.location.heap_size() + + value_1.heap_size() + + &meta_2.location.heap_size() + + value_2.heap_size(); // create a cache with a limit which fits exactly 2 entries let cache = DefaultFileStatisticsCache::new(limit_for_2_entries); @@ -576,7 +581,10 @@ mod tests { cache.remove(&meta_2.location); assert_eq!(cache.len(), 1); - assert_eq!(cache.memory_used(), &meta_3.location.heap_size() + value_3.heap_size()); + assert_eq!( + cache.memory_used(), + &meta_3.location.heap_size() + value_3.heap_size() + ); cache.clear(); assert_eq!(cache.len(), 0); From 8a61d439aa3243f7a3ece35a1b257958cbf7830d Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 11 Feb 2026 15:48:51 +0100 Subject: [PATCH 19/70] Fix clippy --- datafusion/execution/src/cache/cache_unit.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 9f13c9b8e7418..a2e747e099622 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -17,7 +17,7 @@ use crate::cache::CacheAccessor; use crate::cache::cache_manager::{ - CachedFileMetadata, FileMetadataCache, FileStatisticsCache, FileStatisticsCacheEntry, + CachedFileMetadata, FileStatisticsCache, FileStatisticsCacheEntry, }; use object_store::path::Path; use std::collections::HashMap; @@ -546,9 +546,9 @@ mod tests { let (meta_2, value_2) = create_cached_file_metadata_with_stats("test2.parquet"); let (meta_3, value_3) = create_cached_file_metadata_with_stats("test3.parquet"); - let limit_for_2_entries = &meta_1.location.heap_size() + let limit_for_2_entries = meta_1.location.heap_size() + value_1.heap_size() - + &meta_2.location.heap_size() + + meta_2.location.heap_size() + value_2.heap_size(); // create a cache with a limit which fits exactly 2 entries @@ -583,7 +583,7 @@ mod tests { assert_eq!(cache.len(), 1); assert_eq!( cache.memory_used(), - &meta_3.location.heap_size() + value_3.heap_size() + meta_3.location.heap_size() + value_3.heap_size() ); cache.clear(); From f4e196bd89fc52d10b6e0ee4a4621b650f549c33 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 11 Feb 2026 16:09:38 +0100 Subject: [PATCH 20/70] minor --- datafusion/common/src/heap_size.rs | 8 ++++---- datafusion/execution/src/cache/cache_manager.rs | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index ad0855bbe9d4c..d510d3389d4c7 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -170,10 +170,10 @@ impl DFHeapSize for DataType { Struct(s) => s.heap_size(), Union(u, m) => u.heap_size() + m.heap_size(), Dictionary(a, b) => a.heap_size() + b.heap_size(), - Decimal32(u8, i8) => u8.heap_size() + i8.heap_size(), - Decimal64(u8, i8) => u8.heap_size() + i8.heap_size(), - Decimal128(u8, i8) => u8.heap_size() + i8.heap_size(), - Decimal256(u8, i8) => u8.heap_size() + i8.heap_size(), + Decimal32(p, s) => p.heap_size() + s.heap_size(), + Decimal64(p, s) => p.heap_size() + s.heap_size(), + Decimal128(p, s) => p.heap_size() + s.heap_size(), + Decimal256(p, s) => p.heap_size() + s.heap_size(), Map(m, b) => m.heap_size() + b.heap_size(), RunEndEncoded(a, b) => a.heap_size() + b.heap_size(), } diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 3ec0c1898b8bf..933c7df8c7646 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -113,7 +113,7 @@ impl DFHeapSize for CachedFileMetadata { + self.meta.e_tag.heap_size() + self.meta.location.as_ref().heap_size() + self.statistics.heap_size() - //TODO add ordering once LexOrdering /PhysicalExpr implements DFHeapSize + //TODO add ordering once LexOrdering/PhysicalExpr implements DFHeapSize } } @@ -501,6 +501,7 @@ impl CacheManagerConfig { self } + /// Specifies the memory limit for the file statistics cache, in bytes. pub fn with_file_statistics_cache_limit(mut self, limit: usize) -> Self { self.file_statistics_cache_limit = limit; self From b8ecb3e622042669fd9d881d51f7df9260c25cf9 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 09:13:37 +0100 Subject: [PATCH 21/70] Add more key memory accounting --- datafusion/execution/src/cache/cache_unit.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index a2e747e099622..988d9886241f9 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -103,16 +103,18 @@ impl DefaultFileStatisticsCacheState { let entry_size = value.heap_size(); if entry_size + key_size > self.memory_limit { - // Remove stale entry if exists + // Remove potential stale entry self.remove(key); return None; } let old_value = self.lru_queue.put(key.clone(), value); - self.memory_used += entry_size + key_size; + self.memory_used += entry_size; if let Some(old_entry) = &old_value { self.memory_used -= old_entry.heap_size(); + } else { + self.memory_used += key.heap_size(); } self.evict_entries(); From c373bd93f09b2b4fdbb7b35e24faefa763a4190e Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 09:28:48 +0100 Subject: [PATCH 22/70] Fix Formatting --- datafusion/execution/src/cache/cache_unit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 988d9886241f9..a7def2c3d89df 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -103,7 +103,7 @@ impl DefaultFileStatisticsCacheState { let entry_size = value.heap_size(); if entry_size + key_size > self.memory_limit { - // Remove potential stale entry + // Remove potential stale entry self.remove(key); return None; } From 2dbbb6b4083017ed3043173e8347c66381ecc863 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 14:38:00 +0100 Subject: [PATCH 23/70] Account path as string and remove dependency to object_store --- datafusion/common/Cargo.toml | 2 +- datafusion/common/src/heap_size.rs | 7 ------- datafusion/execution/src/cache/cache_unit.rs | 14 +++++++------- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index e8b03bedc57e4..740d4e45b8d05 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -77,7 +77,7 @@ indexmap = { workspace = true } itertools = { workspace = true } libc = "0.2.185" log = { workspace = true } -object_store = { workspace = true, optional = true, default-features = true } +object_store = { workspace = true, optional = true } parquet = { workspace = true, optional = true, default-features = true } recursive = { workspace = true, optional = true } sqlparser = { workspace = true, optional = true } diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index d510d3389d4c7..f5aa4704be15c 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -26,7 +26,6 @@ use arrow::datatypes::{ }; use chrono::{DateTime, Utc}; use half::f16; -use object_store::path::Path; use std::collections::HashMap; use std::fmt::Debug; use std::sync::Arc; @@ -180,12 +179,6 @@ impl DFHeapSize for DataType { } } -impl DFHeapSize for Path { - fn heap_size(&self) -> usize { - self.as_ref().heap_size() - } -} - impl DFHeapSize for Vec { fn heap_size(&self) -> usize { let item_size = size_of::(); diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index a7def2c3d89df..1c047ed74e974 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -99,7 +99,7 @@ impl DefaultFileStatisticsCacheState { key: &Path, value: CachedFileMetadata, ) -> Option { - let key_size = key.heap_size(); + let key_size = key.as_ref().heap_size(); let entry_size = value.heap_size(); if entry_size + key_size > self.memory_limit { @@ -114,7 +114,7 @@ impl DefaultFileStatisticsCacheState { if let Some(old_entry) = &old_value { self.memory_used -= old_entry.heap_size(); } else { - self.memory_used += key.heap_size(); + self.memory_used += key.as_ref().heap_size(); } self.evict_entries(); @@ -124,7 +124,7 @@ impl DefaultFileStatisticsCacheState { fn remove(&mut self, k: &Path) -> Option { if let Some(old_entry) = self.lru_queue.remove(k) { - self.memory_used -= k.heap_size(); + self.memory_used -= k.as_ref().heap_size(); self.memory_used -= old_entry.heap_size(); Some(old_entry) } else { @@ -148,7 +148,7 @@ impl DefaultFileStatisticsCacheState { fn evict_entries(&mut self) { while self.memory_used > self.memory_limit { if let Some(removed) = self.lru_queue.pop() { - self.memory_used -= removed.0.heap_size(); + self.memory_used -= removed.0.as_ref().heap_size(); self.memory_used -= removed.1.heap_size(); } else { // cache is empty while memory_used > memory_limit, cannot happen @@ -548,9 +548,9 @@ mod tests { let (meta_2, value_2) = create_cached_file_metadata_with_stats("test2.parquet"); let (meta_3, value_3) = create_cached_file_metadata_with_stats("test3.parquet"); - let limit_for_2_entries = meta_1.location.heap_size() + let limit_for_2_entries = meta_1.location.as_ref().heap_size() + value_1.heap_size() - + meta_2.location.heap_size() + + meta_2.location.as_ref().heap_size() + value_2.heap_size(); // create a cache with a limit which fits exactly 2 entries @@ -585,7 +585,7 @@ mod tests { assert_eq!(cache.len(), 1); assert_eq!( cache.memory_used(), - meta_3.location.heap_size() + value_3.heap_size() + meta_3.location.as_ref().heap_size() + value_3.heap_size() ); cache.clear(); From 03a73cf539822ac369665caf1ed7fc10af6a79ef Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 15:15:04 +0100 Subject: [PATCH 24/70] Improve error handling --- datafusion/execution/src/cache/cache_unit.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 1c047ed74e974..6e2540bd4a4c9 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -64,7 +64,7 @@ impl DefaultFileStatisticsCache { } } -pub struct DefaultFileStatisticsCacheState { +struct DefaultFileStatisticsCacheState { lru_queue: LruQueue, memory_limit: usize, memory_used: usize, @@ -152,10 +152,17 @@ impl DefaultFileStatisticsCacheState { self.memory_used -= removed.1.heap_size(); } else { // cache is empty while memory_used > memory_limit, cannot happen + log::error!( + "File statistics cache memory accounting bug: memory_used={} but cache is empty. \ + Please report this to the Apache DataFusion developers.", + self.memory_used + ); debug_assert!( false, - "This is a bug! Please report it to the Apache DataFusion developers" + "memory_used={} but cache is empty", + self.memory_used ); + self.memory_used = 0; return; } } From a227e300e67dcc2f8745d90db97ee0bae7e397a4 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 15:15:30 +0100 Subject: [PATCH 25/70] Fix fmt --- datafusion/execution/src/cache/cache_manager.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 933c7df8c7646..e02a4763ba0cc 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -96,6 +96,7 @@ impl CachedFileMetadata { /// /// See [`crate::runtime_env::RuntimeEnv`] for more details pub trait FileStatisticsCache: CacheAccessor { + /// Cache memory limit in bytes. fn cache_limit(&self) -> usize; /// Updates the cache with a new memory limit in bytes. From e45c1474ba7a1e27ae50f517d86a81764208261e Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 15:18:30 +0100 Subject: [PATCH 26/70] Remove path.clone --- datafusion/execution/src/cache/cache_unit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 6e2540bd4a4c9..2887d3ca090be 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -222,7 +222,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { let path = entry.0.clone(); let cached = entry.1.clone(); entries.insert( - path.clone(), + path, FileStatisticsCacheEntry { object_meta: cached.meta.clone(), num_rows: cached.statistics.num_rows, From a884758122482637ec46e77d5b5a9b853f097aaf Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 15:19:00 +0100 Subject: [PATCH 27/70] Simplify accounting for statistics --- datafusion/common/src/heap_size.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index f5aa4704be15c..c39b6de7daf5c 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -46,11 +46,7 @@ impl DFHeapSize for Statistics { fn heap_size(&self) -> usize { self.num_rows.heap_size() + self.total_byte_size.heap_size() - + self - .column_statistics - .iter() - .map(|s| s.heap_size()) - .sum::() + + self.column_statistics.heap_size() } } From 88376c88a6f59372de6bc8c00da725485e09911d Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 15:22:06 +0100 Subject: [PATCH 28/70] Adapt offset buffer --- datafusion/execution/src/cache/cache_unit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 2887d3ca090be..6529c63a72aca 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -620,7 +620,7 @@ mod tests { ) -> (ObjectMeta, CachedFileMetadata) { let series: Vec = (0..=10).collect(); let values = Int32Array::from(series); - let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0])); + let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 11])); let field = Arc::new(Field::new_list_field(DataType::Int32, false)); let list_array = ListArray::new(field, offsets, Arc::new(values), None); From 7dbb125118e99797fbf115e9344f725ce6e83124 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 15:24:36 +0100 Subject: [PATCH 29/70] Fix heap size for Arc --- datafusion/common/src/heap_size.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index c39b6de7daf5c..1acc3486eb51c 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -230,7 +230,7 @@ impl DFHeapSize for HashMap { impl DFHeapSize for Arc { fn heap_size(&self) -> usize { // Arc stores weak and strong counts on the heap alongside an instance of T - 2 * size_of::() + size_of::() + self.as_ref().heap_size() + 2 * size_of::() + self.as_ref().heap_size() } } From 4c581ee0e98511c596162a42343441461fa08479 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 19:59:56 +0100 Subject: [PATCH 30/70] Adapt estimate in test --- datafusion/execution/src/cache/cache_unit.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 6529c63a72aca..1c1b6b9e6e692 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -530,7 +530,7 @@ mod tests { num_rows: Precision::Absent, num_columns: 1, table_size_bytes: Precision::Absent, - statistics_size_bytes: 72, + statistics_size_bytes: 304, has_ordering: false, } ), @@ -541,7 +541,7 @@ mod tests { num_rows: Precision::Absent, num_columns: 1, table_size_bytes: Precision::Absent, - statistics_size_bytes: 72, + statistics_size_bytes: 304, has_ordering: true, } ), From 5b286dfc7640a8668b8c756746be46b9df3e30c4 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 20:23:50 +0100 Subject: [PATCH 31/70] Fix sql logic test --- datafusion/sqllogictest/test_files/array.slt | 9949 ++++++++++++++++++ 1 file changed, 9949 insertions(+) create mode 100644 datafusion/sqllogictest/test_files/array.slt diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt new file mode 100644 index 0000000000000..45cf02700c39a --- /dev/null +++ b/datafusion/sqllogictest/test_files/array.slt @@ -0,0 +1,9949 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############# +## Array Expressions Tests +############# + +### Tables + +statement ok +CREATE TABLE values( + a INT, + b INT, + c INT, + d FLOAT, + e VARCHAR, + f VARCHAR +) AS VALUES + (1, 1, 2, 1.1, 'Lorem', 'A'), + (2, 3, 4, 2.2, 'ipsum', ''), + (3, 5, 6, 3.3, 'dolor', 'BB'), + (4, 7, 8, 4.4, 'sit', NULL), + (NULL, 9, 10, 5.5, 'amet', 'CCC'), + (5, NULL, 12, 6.6, ',', 'DD'), + (6, 11, NULL, 7.7, 'consectetur', 'E'), + (7, 13, 14, NULL, 'adipiscing', 'F'), + (8, 15, 16, 8.8, NULL, '') +; + +statement ok +CREATE TABLE values_without_nulls +AS VALUES + (1, 1, 2, 1.1, 'Lorem', 'A'), + (2, 3, 4, 2.2, 'ipsum', ''), + (3, 5, 6, 3.3, 'dolor', 'BB'), + (4, 7, 8, 4.4, 'sit', NULL), + (5, 9, 10, 5.5, 'amet', 'CCC'), + (6, 11, 12, 6.6, ',', 'DD'), + (7, 13, 14, 7.7, 'consectetur', 'E'), + (8, 15, 16, 8.8, 'adipiscing', 'F'), + (9, 17, 18, 9.9, 'elit', '') +; + +statement ok +CREATE TABLE arrays +AS VALUES + (make_array(make_array(NULL, 2),make_array(3, NULL)), make_array(1.1, 2.2, 3.3), make_array('L', 'o', 'r', 'e', 'm')), + (make_array(make_array(3, 4),make_array(5, 6)), make_array(NULL, 5.5, 6.6), make_array('i', 'p', NULL, 'u', 'm')), + (make_array(make_array(5, 6),make_array(7, 8)), make_array(7.7, 8.8, 9.9), make_array('d', NULL, 'l', 'o', 'r')), + (make_array(make_array(7, NULL),make_array(9, 10)), make_array(10.1, NULL, 12.2), make_array('s', 'i', 't')), + (NULL, make_array(13.3, 14.4, 15.5), make_array('a', 'm', 'e', 't')), + (make_array(make_array(11, 12),make_array(13, 14)), NULL, make_array(',')), + (make_array(make_array(15, 16),make_array(NULL, 18)), make_array(16.6, 17.7, 18.8), NULL) +; + +statement ok +CREATE TABLE large_arrays +AS + SELECT + arrow_cast(column1, 'LargeList(List(Int64))') AS column1, + arrow_cast(column2, 'LargeList(Float64)') AS column2, + arrow_cast(column3, 'LargeList(Utf8)') AS column3 + FROM arrays +; + +statement ok +CREATE TABLE fixed_size_arrays +AS VALUES + (arrow_cast(make_array(make_array(NULL, 2),make_array(3, NULL)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(1.1, 2.2, 3.3), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('L', 'o', 'r', 'e', 'm'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(3, 4),make_array(5, 6)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(NULL, 5.5, 6.6), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('i', 'p', NULL, 'u', 'm'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(5, 6),make_array(7, 8)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(7.7, 8.8, 9.9), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('d', NULL, 'l', 'o', 'r'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(7, NULL),make_array(9, 10)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(10.1, NULL, 12.2), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('s', 'i', 't', 'a', 'b'), 'FixedSizeList(5, Utf8)')), + (NULL, arrow_cast(make_array(13.3, 14.4, 15.5), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('a', 'm', 'e', 't', 'x'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(11, 12),make_array(13, 14)), 'FixedSizeList(2, List(Int64))'), NULL, arrow_cast(make_array(',','a','b','c','d'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(15, 16),make_array(NULL, 18)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(16.6, 17.7, 18.8), 'FixedSizeList(3, Float64)'), NULL) +; + +statement ok +CREATE TABLE slices +AS VALUES + (make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1), + (make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 2, -4), + (make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 0, 0), + (make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), -4, -7), + (NULL, 4, 5), + (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6), + (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 5, NULL) +; + +statement ok +CREATE TABLE fixed_slices +AS VALUES + (arrow_cast(make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 'FixedSizeList(10, Int64)'), 1, 1), + (arrow_cast(make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 'FixedSizeList(10, Int64)'), 2, -4), + (arrow_cast(make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 'FixedSizeList(10, Int64)'), 0, 0), + (arrow_cast(make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), 'FixedSizeList(10, Int64)'), -4, -7), + (arrow_cast(make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), 'FixedSizeList(10, Int64)'), NULL, 6), + (arrow_cast(make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60),'FixedSizeList(10, Int64)'), 5, NULL) +; + +statement ok +CREATE TABLE arrayspop +AS VALUES + (make_array(1, 2, NULL)), + (make_array(3, 4, 5, NULL)), + (make_array(6, 7, 8, NULL, 9)), + (make_array(NULL, NULL, 100)), + (NULL), + (make_array(NULL, 10, 11, 12)) +; + +statement ok +CREATE TABLE large_arrayspop +AS SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1 +FROM arrayspop +; + +statement ok +CREATE TABLE nested_arrays +AS VALUES + (make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), make_array(7, 8, 9), 2, make_array([[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]), make_array(11, 12, 13)), + (make_array(make_array(4, 5, 6), make_array(10, 11, 12), make_array(4, 9, 8), make_array(7, 8, 9), make_array(10, 11, 12), make_array(1, 8, 7)), make_array(10, 11, 12), 3, make_array([[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]]), make_array(121, 131, 141)) +; + +statement ok +CREATE TABLE large_nested_arrays +AS + SELECT + arrow_cast(column1, 'LargeList(LargeList(Int64))') AS column1, + arrow_cast(column2, 'LargeList(Int64)') AS column2, + column3, + arrow_cast(column4, 'LargeList(LargeList(List(Int64)))') AS column4, + arrow_cast(column5, 'LargeList(Int64)') AS column5 + FROM nested_arrays +; + +statement ok +CREATE TABLE fixed_size_nested_arrays +AS VALUES + (arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))'), arrow_cast(make_array(7, 8, 9), 'FixedSizeList(3, Int64)'), 2, arrow_cast(make_array([[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array(11, 12, 13), 'FixedSizeList(3, Int64)')), + (arrow_cast(make_array(make_array(4, 5, 6), make_array(10, 11, 12), make_array(4, 9, 8), make_array(7, 8, 9), make_array(10, 11, 12), make_array(1, 8, 7)), 'FixedSizeList(6, List(Int64))'), arrow_cast(make_array(10, 11, 12), 'FixedSizeList(3, Int64)'), 3, arrow_cast(make_array([[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array(121, 131, 141), 'FixedSizeList(3, Int64)')) +; + +statement ok +CREATE TABLE arrays_values +AS VALUES + (make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','), + (make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 12, 2, '.'), + (make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 23, 3, '-'), + (make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), 34, 4, 'ok'), + (NULL, 44, 5, '@'), + (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6, '$'), + (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 55, NULL, '^'), + (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 66, 7, NULL) +; + +statement ok +CREATE TABLE large_arrays_values +AS SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1, + column2, + column3, + column4 +FROM arrays_values +; + +statement ok +CREATE TABLE fixed_arrays_values +AS SELECT + arrow_cast(column1, 'FixedSizeList(10, Int64)') AS column1, + column2, + column3, + column4 +FROM arrays_values +; + +statement ok +CREATE TABLE arrays_values_v2 +AS VALUES + (make_array(NULL, 2, 3), make_array(4, 5, NULL), 12, make_array([30, 40, 50])), + (NULL, make_array(7, NULL, 8), 13, make_array(make_array(NULL,NULL,60))), + (make_array(9, NULL, 10), NULL, 14, make_array(make_array(70,NULL,NULL))), + (make_array(NULL, 1), make_array(NULL, 21), NULL, NULL), + (make_array(11, 12), NULL, NULL, NULL), + (NULL, NULL, NULL, NULL) +; + +statement ok +CREATE TABLE large_arrays_values_v2 +AS SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1, + arrow_cast(column2, 'LargeList(Int64)') AS column2, + column3, + arrow_cast(column4, 'LargeList(LargeList(Int64))') AS column4 +FROM arrays_values_v2 +; + +statement ok +CREATE TABLE flatten_table +AS VALUES + (make_array([1], [2], [3]), make_array([[1, 2, 3]], [[4, 5]], [[6]]), make_array([[[1]]], [[[2, 3]]]), make_array([1.0], [2.1, 2.2], [3.2, 3.3, 3.4])), + (make_array([1, 2], [3, 4], [5, 6]), make_array([[8]]), make_array([[[1,2]]], [[[3]]]), make_array([1.0, 2.0], [3.0, 4.0], [5.0, 6.0])) +; + +statement ok +CREATE TABLE large_flatten_table +AS + SELECT + arrow_cast(column1, 'LargeList(LargeList(Int64))') AS column1, + arrow_cast(column2, 'LargeList(LargeList(LargeList(Int64)))') AS column2, + arrow_cast(column3, 'LargeList(LargeList(LargeList(LargeList(Int64))))') AS column3, + arrow_cast(column4, 'LargeList(LargeList(Float64))') AS column4 + FROM flatten_table +; + +statement ok +CREATE TABLE fixed_size_flatten_table +AS VALUES + (arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3, List(Int64))'), + arrow_cast(make_array([[1, 2, 3]], [[4, 5]], [[6]]), 'FixedSizeList(3, List(List(Int64)))'), + arrow_cast(make_array([[[1]]], [[[2, 3]]]), 'FixedSizeList(2, List(List(List(Int64))))'), + arrow_cast(make_array([1.0], [2.1, 2.2], [3.2, 3.3, 3.4]), 'FixedSizeList(3, List(Float64))') + ), + ( + arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3, List(Int64))'), + arrow_cast(make_array([[8]], [[9, 10]], [[11, 12, 13]]), 'FixedSizeList(3, List(List(Int64)))'), + arrow_cast(make_array([[[1,2]]], [[[3]]]), 'FixedSizeList(2, List(List(List(Int64))))'), + arrow_cast(make_array([1.0, 2.0], [3.0, 4.0], [5.0, 6.0]), 'FixedSizeList(3, List(Float64))') + ) +; + +statement ok +CREATE TABLE array_has_table_1D +AS VALUES + (make_array(1, 2), 1, make_array(1,2,3), make_array(1,3), make_array(1,3,5), make_array(2,4,6,8,1,3,5)), + (make_array(3, 4, 5), 2, make_array(1,2,3,4), make_array(2,5), make_array(2,4,6), make_array(1,3,5)) +; + +statement ok +CREATE TABLE fixed_size_array_has_table_1D +AS VALUES + (arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 1, arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), arrow_cast(make_array(1,3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array(1,3,5), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(2, 4, 6, 8, 1, 3, 5), 'FixedSizeList(7, Int64)')), + (arrow_cast(make_array(3, 4, 5), 'FixedSizeList(3, Int64)'), 2, arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), arrow_cast(make_array(2,5), 'FixedSizeList(2, Int64)'), arrow_cast(make_array(2,4,6), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 3, 5, 7, 9, 11, 13), 'FixedSizeList(7, Int64)')) +; + +statement ok +CREATE TABLE array_has_table_1D_Float +AS VALUES + (make_array(1.0, 2.0), 1.0, make_array(1.0,2.0,3.0), make_array(1.0,3.0), make_array(1.11), make_array(2.22, 3.33)), + (make_array(3.0, 4.0, 5.0), 2.0, make_array(1.0,2.0,3.0,4.0), make_array(2.0,5.0), make_array(2.22, 1.11), make_array(1.11, 3.33)) +; + +statement ok +CREATE TABLE fixed_size_array_has_table_1D_Float +AS VALUES + (arrow_cast(make_array(1.0, 2.0, 3.0), 'FixedSizeList(3, Float64)'), 1.0, arrow_cast(make_array(1.0, 2.0, 3.0, 4.0), 'FixedSizeList(4, Float64)'), arrow_cast(make_array(1.0,3.0), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(1.11, 2.22), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(2.22, 3.33), 'FixedSizeList(2, Float64)')), + (arrow_cast(make_array(3.0, 4.0, 5.0), 'FixedSizeList(3, Float64)'), 2.0, arrow_cast(make_array(1.0, 2.0, 3.0, 4.0), 'FixedSizeList(4, Float64)'), arrow_cast(make_array(2.0,5.0), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(2.22, 1.11), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(1.11, 3.33), 'FixedSizeList(2, Float64)')) +; + +statement ok +CREATE TABLE array_has_table_1D_Boolean +AS VALUES + (make_array(true, true, true), false, make_array(true, true, false, true, false), make_array(true, false, true), make_array(false), make_array(true, false)), + (make_array(false, false, false), false, make_array(true, false, true), make_array(true, true), make_array(true, true), make_array(false,false,true)) +; + +statement ok +CREATE TABLE fixed_size_array_has_table_1D_Boolean +AS VALUES + (arrow_cast(make_array(true, true, true), 'FixedSizeList(3, Boolean)'), false, arrow_cast(make_array(true, true, false, true, false), 'FixedSizeList(5, Boolean)'), arrow_cast(make_array(true, false, true), 'FixedSizeList(3, Boolean)'), arrow_cast(make_array(false, true), 'FixedSizeList(2, Boolean)'), arrow_cast(make_array(true, false, true), 'FixedSizeList(3, Boolean)')), + (arrow_cast(make_array(false, false, false), 'FixedSizeList(3, Boolean)'), false, arrow_cast(make_array(true, false, true, true, false), 'FixedSizeList(5, Boolean)'), arrow_cast(make_array(true, true, false), 'FixedSizeList(3, Boolean)'), arrow_cast(make_array(true, true), 'FixedSizeList(2, Boolean)'), arrow_cast(make_array(false,false,true), 'FixedSizeList(3, Boolean)')) +; + +statement ok +CREATE TABLE array_has_table_1D_UTF8 +AS VALUES + (make_array('a', 'bc', 'def'), 'bc', make_array('datafusion', 'rust', 'arrow'), make_array('rust', 'arrow'), make_array('rust', 'arrow', 'python'), make_array('data')), + (make_array('a', 'bc', 'def'), 'defg', make_array('datafusion', 'rust', 'arrow'), make_array('datafusion', 'rust', 'arrow', 'python'), make_array('rust', 'arrow'), make_array('datafusion', 'rust', 'arrow')) +; + +statement ok +CREATE TABLE fixed_size_array_has_table_1D_UTF8 +AS VALUES + (arrow_cast(make_array('a', 'bc', 'def'), 'FixedSizeList(3, Utf8)'), 'bc', arrow_cast(make_array('datafusion', 'rust', 'arrow'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('rust', 'arrow', 'datafusion', 'rust'), 'FixedSizeList(4, Utf8)'), arrow_cast(make_array('rust', 'arrow', 'python'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('data', 'fusion', 'rust'), 'FixedSizeList(3, Utf8)')), + (arrow_cast(make_array('a', 'bc', 'def'), 'FixedSizeList(3, Utf8)'), 'defg', arrow_cast(make_array('datafusion', 'rust', 'arrow'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('datafusion', 'rust', 'arrow', 'python'), 'FixedSizeList(4, Utf8)'), arrow_cast(make_array('rust', 'arrow', 'python'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('datafusion', 'rust', 'arrow'), 'FixedSizeList(3, Utf8)')) +; + +statement ok +CREATE TABLE array_has_table_2D +AS VALUES + (make_array([1,2]), make_array(1,3), make_array([1,2,3], [4,5], [6,7]), make_array([4,5], [6,7])), + (make_array([3,4], [5]), make_array(5), make_array([1,2,3,4], [5,6,7], [8,9,10]), make_array([1,2,3], [5,6,7], [8,9,10])) +; + +statement ok +CREATE TABLE fixed_size_array_has_table_2D +AS VALUES + (arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(1,3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array([1,2,3], [4,5], [6,7]), 'FixedSizeList(3, List(Int64))'), arrow_cast(make_array([4,5], [6,7], [1,2,3]), 'FixedSizeList(3, List(Int64))')), + (arrow_cast(make_array([3,4], [5]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(5, 3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array([1,2,3,4], [5,6,7], [8,9,10]), 'FixedSizeList(3, List(Int64))'), arrow_cast(make_array([1,2,3], [5,6,7], [8,9,10]), 'FixedSizeList(3, List(Int64))')) +; + +statement ok +CREATE TABLE array_has_table_2D_float +AS VALUES + (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.1, 2.2], [3.3])), + (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.0], [1.1, 2.2], [3.3])) +; + +statement ok +CREATE TABLE fixed_size_array_has_table_2D_Float +AS VALUES + (arrow_cast(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), 'FixedSizeList(3, List(Float64))'), arrow_cast(make_array([1.1, 2.2], [3.3], [4.4]), 'FixedSizeList(3, List(Float64))')), + (arrow_cast(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), 'FixedSizeList(3, List(Float64))'), arrow_cast(make_array([1.0], [1.1, 2.2], [3.3]), 'FixedSizeList(3, List(Float64))')) +; + +statement ok +CREATE TABLE array_has_table_3D +AS VALUES + (make_array([[1,2]]), make_array([1])), + (make_array([[1,2]]), make_array([1,2])), + (make_array([[1,2]]), make_array([1,2,3])), + (make_array([[1], [2]]), make_array([2])), + (make_array([[1], [2]]), make_array([1], [2])), + (make_array([[1], [2]], [[2], [3]]), make_array([1], [2], [3])), + (make_array([[1], [2]], [[2], [3]]), make_array([1], [2])) +; + +statement ok +CREATE TABLE fixed_size_array_has_table_3D +AS VALUES + (arrow_cast(make_array([[1,2]], [[3, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1,2]], [[4, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1,2], [3, 4]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1,2]], [[4, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1,2,3], [1]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1], [2]], [[]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([2], [3]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1], [2]], [[]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1], [2]], [[2], [3]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1], [2]], [[2], [3]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')) +; + +statement ok +CREATE TABLE array_has_table_null +AS VALUES + (make_array(1, 2), 1), + (make_array(1, NULL), 1), + (make_array(3, 4, 5), 2), + (make_array(3, NULL, 5), 2), + (make_array(NULL, NULL, NULL), 2) +; + +statement ok +CREATE TABLE array_has_table_empty +AS VALUES + (make_array(1, 3, 5), 1), + (make_array(), 1), + (NULL, 1) +; + +statement ok +CREATE TABLE array_distinct_table_1D +AS VALUES + (make_array(1, 1, 2, 2, 3)), + (make_array(1, 2, 3, 4, 5)), + (make_array(3, 5, 3, 3, 3)) +; + +statement ok +CREATE TABLE array_distinct_table_1D_UTF8 +AS VALUES + (make_array('a', 'a', 'bc', 'bc', 'def')), + (make_array('a', 'bc', 'def', 'defg', 'defg')), + (make_array('defg', 'defg', 'defg', 'defg', 'defg')) +; + +statement ok +CREATE TABLE array_distinct_table_2D +AS VALUES + (make_array([1,2], [1,2], [3,4], [3,4], [5,6])), + (make_array([1,2], [3,4], [5,6], [7,8], [9,10])), + (make_array([5,6], [5,6], NULL)) +; + +statement ok +CREATE TABLE array_distinct_table_1D_large +AS SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1 +FROM array_distinct_table_1D +; + +statement ok +CREATE TABLE array_distinct_table_1D_fixed +AS SELECT + arrow_cast(column1, 'FixedSizeList(5, Int64)') AS column1 +FROM array_distinct_table_1D +; + +statement ok +CREATE TABLE array_distinct_table_1D_UTF8_fixed +AS SELECT + arrow_cast(column1, 'FixedSizeList(5, Utf8)') AS column1 +FROM array_distinct_table_1D_UTF8 +; + +statement ok +CREATE TABLE array_distinct_table_2D_fixed +AS VALUES + (arrow_cast(make_array([1,2], [1,2], [3,4], [3,4], [5,6]), 'FixedSizeList(5, List(Int64))')), + (arrow_cast(make_array([1,2], [3,4], [5,6], [7,8], [9,10]), 'FixedSizeList(5, List(Int64))')), + (arrow_cast(make_array([5,6], [5,6], NULL, NULL, NULL), 'FixedSizeList(5, List(Int64))')) +; + +statement ok +CREATE TABLE array_intersect_table_1D +AS VALUES + (make_array(1, 2), make_array(1), make_array(1,2,3), make_array(1,3), make_array(1,3,5), make_array(2,4,6,8,1,3)), + (make_array(11, 22), make_array(11), make_array(11,22,33), make_array(11,33), make_array(11,33,55), make_array(22,44,66,88,11,33)) +; + +statement ok +CREATE TABLE large_array_intersect_table_1D +AS + SELECT + arrow_cast(column1, 'LargeList(Int64)') as column1, + arrow_cast(column2, 'LargeList(Int64)') as column2, + arrow_cast(column3, 'LargeList(Int64)') as column3, + arrow_cast(column4, 'LargeList(Int64)') as column4, + arrow_cast(column5, 'LargeList(Int64)') as column5, + arrow_cast(column6, 'LargeList(Int64)') as column6 +FROM array_intersect_table_1D +; + +statement ok +CREATE TABLE array_intersect_table_1D_Float +AS VALUES + (make_array(1.0, 2.0), make_array(1.0), make_array(1.0,2.0,3.0), make_array(1.0,3.0), make_array(1.11), make_array(2.22, 3.33)), + (make_array(3.0, 4.0, 5.0), make_array(2.0), make_array(1.0,2.0,3.0,4.0), make_array(2.0,5.0), make_array(2.22, 1.11), make_array(1.11, 3.33)) +; + +statement ok +CREATE TABLE large_array_intersect_table_1D_Float +AS + SELECT + arrow_cast(column1, 'LargeList(Float64)') as column1, + arrow_cast(column2, 'LargeList(Float64)') as column2, + arrow_cast(column3, 'LargeList(Float64)') as column3, + arrow_cast(column4, 'LargeList(Float64)') as column4, + arrow_cast(column5, 'LargeList(Float64)') as column5, + arrow_cast(column6, 'LargeList(Float64)') as column6 +FROM array_intersect_table_1D_Float +; + +statement ok +CREATE TABLE array_intersect_table_1D_Boolean +AS VALUES + (make_array(true, true, true), make_array(false), make_array(true, true, false, true, false), make_array(true, false, true), make_array(false), make_array(true, false)), + (make_array(false, false, false), make_array(false), make_array(true, false, true), make_array(true, true), make_array(true, true), make_array(false,false,true)) +; + +statement ok +CREATE TABLE large_array_intersect_table_1D_Boolean +AS + SELECT + arrow_cast(column1, 'LargeList(Boolean)') as column1, + arrow_cast(column2, 'LargeList(Boolean)') as column2, + arrow_cast(column3, 'LargeList(Boolean)') as column3, + arrow_cast(column4, 'LargeList(Boolean)') as column4, + arrow_cast(column5, 'LargeList(Boolean)') as column5, + arrow_cast(column6, 'LargeList(Boolean)') as column6 +FROM array_intersect_table_1D_Boolean +; + +statement ok +CREATE TABLE array_intersect_table_1D_UTF8 +AS VALUES + (make_array('a', 'bc', 'def'), make_array('bc'), make_array('datafusion', 'rust', 'arrow'), make_array('rust', 'arrow'), make_array('rust', 'arrow', 'python'), make_array('data')), + (make_array('a', 'bc', 'def'), make_array('defg'), make_array('datafusion', 'rust', 'arrow'), make_array('datafusion', 'rust', 'arrow', 'python'), make_array('rust', 'arrow'), make_array('datafusion', 'rust', 'arrow')) +; + +statement ok +CREATE TABLE large_array_intersect_table_1D_UTF8 +AS + SELECT + arrow_cast(column1, 'LargeList(Utf8)') as column1, + arrow_cast(column2, 'LargeList(Utf8)') as column2, + arrow_cast(column3, 'LargeList(Utf8)') as column3, + arrow_cast(column4, 'LargeList(Utf8)') as column4, + arrow_cast(column5, 'LargeList(Utf8)') as column5, + arrow_cast(column6, 'LargeList(Utf8)') as column6 +FROM array_intersect_table_1D_UTF8 +; + +statement ok +CREATE TABLE array_intersect_table_1D_NULL +AS VALUES + ([1, 2, 2, 3], [2, 3, 4]), + ([2, 3, 3], [3]), + ([3], [3, 3, 4]), + (null, [3, 4]), + ([1, 2], null), + (null, null) +; + +statement ok +CREATE TABLE array_intersect_table_2D +AS VALUES + (make_array([1,2]), make_array([1,3]), make_array([1,2,3], [4,5], [6,7]), make_array([4,5], [6,7])), + (make_array([3,4], [5]), make_array([3,4]), make_array([1,2,3,4], [5,6,7], [8,9,10]), make_array([1,2,3], [5,6,7], [8,9,10])) +; + +statement ok +CREATE TABLE large_array_intersect_table_2D +AS + SELECT + arrow_cast(column1, 'LargeList(List(Int64))') as column1, + arrow_cast(column2, 'LargeList(List(Int64))') as column2, + arrow_cast(column3, 'LargeList(List(Int64))') as column3, + arrow_cast(column4, 'LargeList(List(Int64))') as column4 +FROM array_intersect_table_2D +; + +statement ok +CREATE TABLE array_intersect_table_2D_float +AS VALUES + (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.1, 2.2], [3.3])), + (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.0], [1.1, 2.2], [3.3])) +; + +statement ok +CREATE TABLE large_array_intersect_table_2D_Float +AS + SELECT + arrow_cast(column1, 'LargeList(List(Float64))') as column1, + arrow_cast(column2, 'LargeList(List(Float64))') as column2 +FROM array_intersect_table_2D_Float +; + +statement ok +CREATE TABLE array_intersect_table_3D +AS VALUES + (make_array([[1,2]]), make_array([[1]])), + (make_array([[1,2]]), make_array([[1,2]])) +; + +statement ok +CREATE TABLE large_array_intersect_table_3D +AS + SELECT + arrow_cast(column1, 'LargeList(List(List(Int64)))') as column1, + arrow_cast(column2, 'LargeList(List(List(Int64)))') as column2 +FROM array_intersect_table_3D +; + +statement ok +CREATE TABLE arrays_values_without_nulls +AS VALUES + (make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ',', [2,3]), + (make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 12, 2, '.', [4,5]), + (make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 23, 3, '-', [6,7]), + (make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok', [8,9]) +; + +statement ok +CREATE TABLE large_arrays_values_without_nulls +AS SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1, + column2, + column3, + column4, + arrow_cast(column5, 'LargeList(Int64)') AS column5 +FROM arrays_values_without_nulls +; + +statement ok +CREATE TABLE fixed_size_arrays_values_without_nulls +AS VALUES + (arrow_cast(make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 'FixedSizeList(10, Int64)'), 1, 1, ',', [2,3]), + (arrow_cast(make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 'FixedSizeList(10, Int64)'), 12, 2, '.', [4,5]), + (arrow_cast(make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 'FixedSizeList(10, Int64)'), 23, 3, '-', [6,7]), + (arrow_cast(make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 'FixedSizeList(10, Int64)'), 34, 4, 'ok', [8,9]) +; + +statement ok +CREATE TABLE arrays_range +AS VALUES + (3, 10, 2), + (4, 13, 3) +; + +statement ok +CREATE TABLE arrays_with_repeating_elements +AS VALUES + (make_array(1, 2, 1, 3, 2, 2, 1, 3, 2, 3), 2, 4, 3), + (make_array(4, 4, 5, 5, 6, 5, 5, 5, 4, 4), 4, 7, 2), + (make_array(7, 7, 7, 8, 7, 9, 7, 8, 7, 7), 7, 10, 5), + (make_array(10, 11, 12, 10, 11, 12, 10, 11, 12, 10), 10, 13, 10) +; + +statement ok +CREATE TABLE large_arrays_with_repeating_elements +AS + SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1, + column2, + column3, + column4 + FROM arrays_with_repeating_elements +; + +statement ok +CREATE TABLE fixed_arrays_with_repeating_elements +AS VALUES + (arrow_cast(make_array(1, 2, 1, 3, 2, 2, 1, 3, 2, 3), 'FixedSizeList(10, Int64)'), 2, 4, 3), + (arrow_cast(make_array(4, 4, 5, 5, 6, 5, 5, 5, 4, 4), 'FixedSizeList(10, Int64)'), 4, 7, 2), + (arrow_cast(make_array(7, 7, 7, 8, 7, 9, 7, 8, 7, 7), 'FixedSizeList(10, Int64)'), 7, 10, 5), + (arrow_cast(make_array(10, 11, 12, 10, 11, 12, 10, 11, 12, 10), 'FixedSizeList(10, Int64)'), 10, 13, 10) +; + +statement ok +CREATE TABLE nested_arrays_with_repeating_elements +AS VALUES + (make_array([1, 2, 3], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]), [4, 5, 6], [10, 11, 12], 3), + (make_array([10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]), [10, 11, 12], [19, 20, 21], 2), + (make_array([19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]), [19, 20, 21], [28, 29, 30], 5), + (make_array([28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]), [28, 29, 30], [37, 38, 39], 10) +; + +statement ok +CREATE TABLE large_nested_arrays_with_repeating_elements +AS + SELECT + arrow_cast(column1, 'LargeList(List(Int64))') AS column1, + column2, + column3, + column4 + FROM nested_arrays_with_repeating_elements +; + +statement ok +CREATE TABLE fixed_size_nested_arrays_with_repeating_elements +AS VALUES + (arrow_cast(make_array([1, 2, 3], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(10, List(Int64))'), [4, 5, 6], [10, 11, 12], 3), + (arrow_cast(make_array([10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]), 'FixedSizeList(10, List(Int64))'), [10, 11, 12], [19, 20, 21], 2), + (arrow_cast(make_array([19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]), 'FixedSizeList(10, List(Int64))'), [19, 20, 21], [28, 29, 30], 5), + (arrow_cast(make_array([28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]), 'FixedSizeList(10, List(Int64))'), [28, 29, 30], [28, 29, 30], 10) +; + +statement ok +CREATE TABLE arrays_distance_table +AS VALUES + (make_array(1, 2, 3), make_array(1, 2, 3), make_array(1.1, 2.2, 3.3) , make_array(1.1, NULL, 3.3)), + (make_array(1, 2, 3), make_array(4, 5, 6), make_array(4.4, 5.5, 6.6), make_array(4.4, NULL, 6.6)), + (make_array(1, 2, 3), make_array(7, 8, 9), make_array(7.7, 8.8, 9.9), make_array(7.7, NULL, 9.9)), + (make_array(1, 2, 3), make_array(10, 11, 12), make_array(10.1, 11.2, 12.3), make_array(10.1, NULL, 12.3)) +; + +statement ok +CREATE TABLE large_arrays_distance_table +AS + SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1, + arrow_cast(column2, 'LargeList(Int64)') AS column2, + arrow_cast(column3, 'LargeList(Float64)') AS column3, + arrow_cast(column4, 'LargeList(Float64)') AS column4 +FROM arrays_distance_table +; + +statement ok +CREATE TABLE fixed_size_arrays_distance_table +AS + SELECT + arrow_cast(column1, 'FixedSizeList(3, Int64)') AS column1, + arrow_cast(column2, 'FixedSizeList(3, Int64)') AS column2, + arrow_cast(column3, 'FixedSizeList(3, Float64)') AS column3, + arrow_cast(column4, 'FixedSizeList(3, Float64)') AS column4 +FROM arrays_distance_table +; + +# Disable file statistics cache because file statistics have been previously created +statement ok +set datafusion.runtime.file_statistics_cache_limit = "0K"; + + +# Array literal + +## boolean coercion is not supported +query error +select [1, true, null] + +## wrapped in array_length to get deterministic results +query I +SELECT array_length([now()]) +---- +1 + +## array literal with functions +query ? +select [abs(-1.2), sin(-1), log(2), ceil(3.141)] +---- +[1.2, -0.8414709848078965, 0.30102999566398114, 4.0] + +## array literal with nested types +query ??? +select + [struct('foo', 1)], + [struct('foo', [1,2,3])], + [struct('foo', [struct(3, 'x')])] +; +---- +[{c0: foo, c1: 1}] [{c0: foo, c1: [1, 2, 3]}] [{c0: foo, c1: [{c0: 3, c1: x}]}] + +query TTT +select arrow_typeof(column1), arrow_typeof(column2), arrow_typeof(column3) from arrays; +---- +List(List(Int64)) List(Float64) List(Utf8) +List(List(Int64)) List(Float64) List(Utf8) +List(List(Int64)) List(Float64) List(Utf8) +List(List(Int64)) List(Float64) List(Utf8) +List(List(Int64)) List(Float64) List(Utf8) +List(List(Int64)) List(Float64) List(Utf8) +List(List(Int64)) List(Float64) List(Utf8) + +# arrays table +query ??? +select column1, column2, column3 from arrays; +---- +[[NULL, 2], [3, NULL]] [1.1, 2.2, 3.3] [L, o, r, e, m] +[[3, 4], [5, 6]] [NULL, 5.5, 6.6] [i, p, NULL, u, m] +[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, NULL, l, o, r] +[[7, NULL], [9, 10]] [10.1, NULL, 12.2] [s, i, t] +NULL [13.3, 14.4, 15.5] [a, m, e, t] +[[11, 12], [13, 14]] NULL [,] +[[15, 16], [NULL, 18]] [16.6, 17.7, 18.8] NULL + +# nested_arrays table +query ??I?? +select column1, column2, column3, column4, column5 from nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [7, 8, 9] 2 [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]] [11, 12, 13] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [10, 11, 12] 3 [[[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]]] [121, 131, 141] + +# values table +query IIIRT +select a, b, c, d, e from values; +---- +1 1 2 1.1 Lorem +2 3 4 2.2 ipsum +3 5 6 3.3 dolor +4 7 8 4.4 sit +NULL 9 10 5.5 amet +5 NULL 12 6.6 , +6 11 NULL 7.7 consectetur +7 13 14 NULL adipiscing +8 15 16 8.8 NULL + +# arrays_values table +query ?IIT +select column1, column2, column3, column4 from arrays_values; +---- +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 , +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] 12 2 . +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] 23 3 - +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] 34 4 ok +NULL 44 5 @ +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 $ +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] 55 NULL ^ +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70] 66 7 NULL + +# slices table +query ?II +select column1, column2, column3 from slices; +---- +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] 2 -4 +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] 0 0 +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] -4 -7 +NULL 4 5 +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] 5 NULL + +query ??I? +select column1, column2, column3, column4 from arrays_values_v2; +---- +[NULL, 2, 3] [4, 5, NULL] 12 [[30, 40, 50]] +NULL [7, NULL, 8] 13 [[NULL, NULL, 60]] +[9, NULL, 10] NULL 14 [[70, NULL, NULL]] +[NULL, 1] [NULL, 21] NULL NULL +[11, 12] NULL NULL NULL +NULL NULL NULL NULL + +# arrays_values_without_nulls table +query ?IIT +select column1, column2, column3, column4 from arrays_values_without_nulls; +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 , +[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 12 2 . +[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 23 3 - +[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] 34 4 ok + +# arrays_with_repeating_elements table +query ?III +select column1, column2, column3, column4 from arrays_with_repeating_elements; +---- +[1, 2, 1, 3, 2, 2, 1, 3, 2, 3] 2 4 3 +[4, 4, 5, 5, 6, 5, 5, 5, 4, 4] 4 7 2 +[7, 7, 7, 8, 7, 9, 7, 8, 7, 7] 7 10 5 +[10, 11, 12, 10, 11, 12, 10, 11, 12, 10] 10 13 10 + +# nested_arrays_with_repeating_elements table +query ???I +select column1, column2, column3, column4 from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [4, 5, 6] [10, 11, 12] 3 +[[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [10, 11, 12] [19, 20, 21] 2 +[[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [19, 20, 21] [28, 29, 30] 5 +[[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [28, 29, 30] [37, 38, 39] 10 + + +### Array index + + +## array[i] + +# single index with scalars #1 (positive index) +query IRT +select make_array(1, 2, 3)[1], make_array(1.0, 2.0, 3.0)[2], make_array('h', 'e', 'l', 'l', 'o')[3]; +---- +1 2 l + +# single index with scalars #2 (zero index) +query I +select make_array(1, 2, 3)[0]; +---- +NULL + +# single index with scalars #3 (negative index) +query IRT +select make_array(1, 2, 3)[-1], make_array(1.0, 2.0, 3.0)[-2], make_array('h', 'e', 'l', 'l', 'o')[-3]; +---- +3 2 l + +# single index with scalars #4 (complex index) +query IRT +select make_array(1, 2, 3)[1 + 2 - 1], make_array(1.0, 2.0, 3.0)[2 * 1 * 0 - 2], make_array('h', 'e', 'l', 'l', 'o')[2 - 3]; +---- +2 2 o + +# single index with columns #1 (positive index) +query ?RT +select column1[2], column2[3], column3[1] from arrays; +---- +[3, NULL] 3.3 L +[5, 6] 6.6 i +[7, 8] 9.9 d +[9, 10] 12.2 s +NULL 15.5 a +[13, 14] NULL , +[NULL, 18] 18.8 NULL + +# single index with columns #2 (zero index) +query ?RT +select column1[0], column2[0], column3[0] from arrays; +---- +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL + +# single index with columns #3 (negative index) +query ?RT +select column1[-2], column2[-3], column3[-1] from arrays; +---- +[NULL, 2] 1.1 m +[3, 4] NULL m +[5, 6] 7.7 r +[7, NULL] 10.1 t +NULL 13.3 t +[11, 12] NULL , +[15, 16] 16.6 NULL + +# single index with columns #4 (complex index) +query ?RT +select column1[9 - 7], column2[2 * 0], column3[1 - 3] from arrays; +---- +[3, NULL] NULL e +[5, 6] NULL u +[7, 8] NULL o +[9, 10] NULL i +NULL NULL e +[13, 14] NULL NULL +[NULL, 18] NULL NULL + +# TODO: support index as column +# single index with columns #5 (index as column) +# query ? +# select make_array(1, 2, 3, 4, 5)[column2] from arrays_with_repeating_elements; +# ---- + +# TODO: support argument and index as columns +# single index with columns #6 (argument and index as columns) +# query I +# select column1[column2] from arrays_with_repeating_elements; +# ---- + +## array[i:j] + +# multiple index with columns #1 (positive index) +query ??? +select make_array(1, 2, 3)[1:2], make_array(1.0, 2.0, 3.0)[2:3], make_array('h', 'e', 'l', 'l', 'o')[2:4]; +---- +[1, 2] [2.0, 3.0] [e, l, l] + +query ??? +select arrow_cast([1, 2, 3], 'LargeList(Int64)')[1:2], + arrow_cast([1.0, 2.0, 3.0], 'LargeList(Int64)')[2:3], + arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)')[2:4] +; +---- +[1, 2] [2, 3] [e, l, l] + +# multiple index with columns #2 (zero index) +query ??? +select make_array(1, 2, 3)[0:0], make_array(1.0, 2.0, 3.0)[0:2], make_array('h', 'e', 'l', 'l', 'o')[0:6]; +---- +[] [1.0, 2.0] [h, e, l, l, o] + +query ??? +select arrow_cast([1, 2, 3], 'LargeList(Int64)')[0:0], + arrow_cast([1.0, 2.0, 3.0], 'LargeList(Int64)')[0:2], + arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)')[0:6] +; +---- +[] [1, 2] [h, e, l, l, o] + +query I +select arrow_cast([1, 2, 3], 'LargeList(Int64)')[1]; +---- +1 + +# TODO: support multiple negative index +# multiple index with columns #3 (negative index) +# query II +# select make_array(1, 2, 3)[-3:-1], make_array(1.0, 2.0, 3.0)[-3:-1], make_array('h', 'e', 'l', 'l', 'o')[-2:0]; +# ---- + +# TODO: support complex index +# multiple index with columns #4 (complex index) +# query III +# select make_array(1, 2, 3)[2 + 1 - 1:10], make_array(1.0, 2.0, 3.0)[2 | 2:10], make_array('h', 'e', 'l', 'l', 'o')[6 ^ 6:10]; +# ---- + +# multiple index with columns #1 (positive index) +query ??? +select column1[2:4], column2[1:4], column3[3:4] from arrays; +---- +[[3, NULL]] [1.1, 2.2, 3.3] [r, e] +[[5, 6]] [NULL, 5.5, 6.6] [NULL, u] +[[7, 8]] [7.7, 8.8, 9.9] [l, o] +[[9, 10]] [10.1, NULL, 12.2] [t] +NULL [13.3, 14.4, 15.5] [e, t] +[[13, 14]] NULL [] +[[NULL, 18]] [16.6, 17.7, 18.8] NULL + +# multiple index with columns #2 (zero index) +query ??? +select column1[0:5], column2[0:3], column3[0:9] from arrays; +---- +[[NULL, 2], [3, NULL]] [1.1, 2.2, 3.3] [L, o, r, e, m] +[[3, 4], [5, 6]] [NULL, 5.5, 6.6] [i, p, NULL, u, m] +[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, NULL, l, o, r] +[[7, NULL], [9, 10]] [10.1, NULL, 12.2] [s, i, t] +NULL [13.3, 14.4, 15.5] [a, m, e, t] +[[11, 12], [13, 14]] NULL [,] +[[15, 16], [NULL, 18]] [16.6, 17.7, 18.8] NULL + +# TODO: support negative index +# multiple index with columns #3 (negative index) +# query ?RT +# select column1[-2:-4], column2[-3:-5], column3[-1:-4] from arrays; +# ---- +# [NULL, 2] 1.1 m + +# TODO: support complex index +# multiple index with columns #4 (complex index) +# query ?RT +# select column1[9 - 7:2 + 2], column2[1 * 0:2 * 3], column3[1 + 1 - 0:5 % 3] from arrays; +# ---- + +# TODO: support first index as column +# multiple index with columns #5 (first index as column) +# query ? +# select make_array(1, 2, 3, 4, 5)[column2:4] from arrays_with_repeating_elements +# ---- + +# TODO: support last index as column +# multiple index with columns #6 (last index as column) +# query ?RT +# select make_array(1, 2, 3, 4, 5)[2:column3] from arrays_with_repeating_elements; +# ---- + +# TODO: support argument and indices as column +# multiple index with columns #7 (argument and indices as column) +# query ?RT +# select column1[column2:column3] from arrays_with_repeating_elements; +# ---- + +# array[i:j:k] + +# multiple index with columns #1 (positive index) +query ??? +select make_array(1, 2, 3)[1:2:2], make_array(1.0, 2.0, 3.0)[2:3:2], make_array('h', 'e', 'l', 'l', 'o')[2:4:2]; +---- +[1] [2.0] [e, l] + +# multiple index with columns #2 (zero index) +query ??? +select make_array(1, 2, 3)[0:0:2], make_array(1.0, 2.0, 3.0)[0:2:2], make_array('h', 'e', 'l', 'l', 'o')[0:6:2]; +---- +[] [1.0] [h, l, o] + +#TODO: sqlparser does not support negative index +## multiple index with columns #3 (negative index) +#query ??? +#select make_array(1, 2, 3)[-1:-2:-2], make_array(1.0, 2.0, 3.0)[-2:-3:-2], make_array('h', 'e', 'l', 'l', 'o')[-2:-4:-2]; +#---- +#[1] [2.0] [e, l] + +# multiple index with columns #1 (positive index) +query ??? +select column1[2:4:2], column2[1:4:2], column3[3:4:2] from arrays; +---- +[[3, NULL]] [1.1, 3.3] [r] +[[5, 6]] [NULL, 6.6] [NULL] +[[7, 8]] [7.7, 9.9] [l] +[[9, 10]] [10.1, 12.2] [t] +NULL [13.3, 15.5] [e] +[[13, 14]] NULL [] +[[NULL, 18]] [16.6, 18.8] NULL + +# multiple index with columns #2 (zero index) +query ??? +select column1[0:5:2], column2[0:3:2], column3[0:9:2] from arrays; +---- +[[NULL, 2]] [1.1, 3.3] [L, r, m] +[[3, 4]] [NULL, 6.6] [i, NULL, m] +[[5, 6]] [7.7, 9.9] [d, l, r] +[[7, NULL]] [10.1, 12.2] [s, t] +NULL [13.3, 15.5] [a, e] +[[11, 12]] NULL [,] +[[15, 16]] [16.6, 18.8] NULL + + +### Array function tests + + +## make_array (aliases: `make_list`) + +# make_array scalar function #1 +query ??? +select make_array(1, 2, 3), make_array(1.0, 2.0, 3.0), make_array('h', 'e', 'l', 'l', 'o'); +---- +[1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o] + +# make_array scalar function #2 +query ??? +select make_array(1, 2, 3), make_array(make_array(1, 2), make_array(3, 4)), make_array([[[[1], [2]]]]); +---- +[1, 2, 3] [[1, 2], [3, 4]] [[[[[1], [2]]]]] + +# make_array scalar function #3 +query ?? +select make_array([1, 2, 3], [4, 5, 6], [7, 8, 9]), make_array([[1, 2], [3, 4]], [[5, 6], [7, 8]]); +---- +[[1, 2, 3], [4, 5, 6], [7, 8, 9]] [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] + +# make_array scalar function #4 +query ?? +select make_array([1.0, 2.0], [3.0, 4.0]), make_array('h', 'e', 'l', 'l', 'o'); +---- +[[1.0, 2.0], [3.0, 4.0]] [h, e, l, l, o] + +# make_array scalar function #5 +query ? +select make_array(make_array(make_array(make_array(1, 2, 3), make_array(4, 5, 6)), make_array(make_array(7, 8, 9), make_array(10, 11, 12)))) +---- +[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]] + +# make_array scalar function #6 +query ? +select make_array() +---- +[] + +# make_array scalar function #7 +query ?? +select make_array(make_array()), make_array(make_array(make_array())) +---- +[[]] [[[]]] + +# make_list scalar function #8 (function alias: `make_array`) +query ??? +select make_list(1, 2, 3), make_list(1.0, 2.0, 3.0), make_list('h', 'e', 'l', 'l', 'o'); +---- +[1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o] + +# make_array scalar function with nulls +query ??? +select make_array(1, NULL, 3), make_array(NULL, 2.0, NULL), make_array('h', NULL, 'l', NULL, 'o'); +---- +[1, NULL, 3] [NULL, 2.0, NULL] [h, NULL, l, NULL, o] + +# make_array scalar function with nulls #2 +query ?? +select make_array(1, 2, NULL), make_array(make_array(NULL, 2), make_array(NULL, 3)); +---- +[1, 2, NULL] [[NULL, 2], [NULL, 3]] + +# make_array scalar function with nulls #3 +query ??? +select make_array(NULL), make_array(NULL, NULL, NULL), make_array(make_array(NULL, NULL), make_array(NULL, NULL)); +---- +[NULL] [NULL, NULL, NULL] [[NULL, NULL], [NULL, NULL]] + +# make_array with 1 columns +query ??? +select make_array(a), make_array(d), make_array(e) from values; +---- +[1] [1.1] [Lorem] +[2] [2.2] [ipsum] +[3] [3.3] [dolor] +[4] [4.4] [sit] +[NULL] [5.5] [amet] +[5] [6.6] [,] +[6] [7.7] [consectetur] +[7] [NULL] [adipiscing] +[8] [8.8] [NULL] + +# make_array with 2 columns #1 +query ?? +select make_array(b, c), make_array(e, f) from values; +---- +[1, 2] [Lorem, A] +[3, 4] [ipsum, ] +[5, 6] [dolor, BB] +[7, 8] [sit, NULL] +[9, 10] [amet, CCC] +[NULL, 12] [,, DD] +[11, NULL] [consectetur, E] +[13, 14] [adipiscing, F] +[15, 16] [NULL, ] + +# make_array with 4 columns +query ? +select make_array(a, b, c, d) from values; +---- +[1.0, 1.0, 2.0, 1.1] +[2.0, 3.0, 4.0, 2.2] +[3.0, 5.0, 6.0, 3.3] +[4.0, 7.0, 8.0, 4.4] +[NULL, 9.0, 10.0, 5.5] +[5.0, NULL, 12.0, 6.6] +[6.0, 11.0, NULL, 7.7] +[7.0, 13.0, 14.0, NULL] +[8.0, 15.0, 16.0, 8.8] + +# make_array with column of list +query ?? +select column1, column5 from arrays_values_without_nulls; +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [2, 3] +[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] [4, 5] +[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] [6, 7] +[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] [8, 9] + +# make array with arrays of different types +query ? +select make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)')) +---- +[[1], [-1]] + +query T +select arrow_typeof(make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)'))); +---- +List(LargeList(Int64)) + + +query ??? +select make_array(column1), + make_array(column1, column5), + make_array(column1, make_array(50,51,52)) +from arrays_values_without_nulls; +---- +[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]] [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [2, 3]] [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [50, 51, 52]] +[[11, 12, 13, 14, 15, 16, 17, 18, 19, 20]] [[11, 12, 13, 14, 15, 16, 17, 18, 19, 20], [4, 5]] [[11, 12, 13, 14, 15, 16, 17, 18, 19, 20], [50, 51, 52]] +[[21, 22, 23, 24, 25, 26, 27, 28, 29, 30]] [[21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [6, 7]] [[21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [50, 51, 52]] +[[31, 32, 33, 34, 35, 26, 37, 38, 39, 40]] [[31, 32, 33, 34, 35, 26, 37, 38, 39, 40], [8, 9]] [[31, 32, 33, 34, 35, 26, 37, 38, 39, 40], [50, 51, 52]] + +## array_element (aliases: array_extract, list_extract, list_element) + +# Testing with empty arguments should result in an error +query error DataFusion error: Error during planning: 'array_element' does not support zero arguments +select array_element(); + +# array_element error +query error +select array_element(1, 2); + +# array_element with null +query I +select array_element([1, 2], NULL); +---- +NULL + +query ? +select array_element(NULL, 2); +---- +NULL + +# array_element scalar function #1 (with positive index) +query IT +select array_element(make_array(1, 2, 3, 4, 5), 2), array_element(make_array('h', 'e', 'l', 'l', 'o'), 3); +---- +2 l + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); +---- +2 l + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); +---- +2 l + +# array_element scalar function #2 (with positive index; out of bounds) +query IT +select array_element(make_array(1, 2, 3, 4, 5), 7), array_element(make_array('h', 'e', 'l', 'l', 'o'), 11); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 11); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 11); +---- +NULL NULL + +# array_element scalar function #3 (with zero) +query IT +select array_element(make_array(1, 2, 3, 4, 5), 0), array_element(make_array('h', 'e', 'l', 'l', 'o'), 0); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 0), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 0); +---- +NULL NULL + +# array_element scalar function #4 (with NULL) +query IT +select array_element(make_array(1, 2, 3, 4, 5), NULL), array_element(make_array('h', 'e', 'l', 'l', 'o'), NULL); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), NULL), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), NULL); +---- +NULL NULL + +# array_element scalar function #5 (with negative index) +query IT +select array_element(make_array(1, 2, 3, 4, 5), -2), array_element(make_array('h', 'e', 'l', 'l', 'o'), -3); +---- +4 l + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3); +---- +4 l + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), -2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), -3); +---- +4 l + +# array_element scalar function #6 (with negative index; out of bounds) +query IT +select array_element(make_array(1, 2, 3, 4, 5), -11), array_element(make_array('h', 'e', 'l', 'l', 'o'), -7); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -11), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -7); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), -11), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), -7); +---- +NULL NULL + +# array_element scalar function #7 (nested array) +query ? +select array_element(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 1); +---- +[1, 2, 3, 4, 5] + +query ? +select array_element(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'LargeList(List(Int64))'), 1); +---- +[1, 2, 3, 4, 5] + +query ? +select array_element(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'FixedSizeList(2, List(Int64))'), 1); +---- +[1, 2, 3, 4, 5] + +# array_extract scalar function #8 (function alias `array_element`) +query IT +select array_extract(make_array(1, 2, 3, 4, 5), 2), array_extract(make_array('h', 'e', 'l', 'l', 'o'), 3); +---- +2 l + +query IT +select array_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); +---- +2 l + +query IT +select array_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), array_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); +---- +2 l + +# list_element scalar function #9 (function alias `array_element`) +query IT +select list_element(make_array(1, 2, 3, 4, 5), 2), list_element(make_array('h', 'e', 'l', 'l', 'o'), 3); +---- +2 l + +query IT +select list_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); +---- +2 l + +query IT +select list_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), list_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); +---- +2 l + +# list_extract scalar function #10 (function alias `array_element`) +query IT +select list_extract(make_array(1, 2, 3, 4, 5), 2), list_extract(make_array('h', 'e', 'l', 'l', 'o'), 3); +---- +2 l + +query IT +select list_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), list_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); +---- +2 l + +query IT +select list_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), list_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); +---- +2 l + +# array_element with columns +query I +select array_element(column1, column2) from slices; +---- +NULL +12 +NULL +37 +NULL +NULL +55 + +query I +select array_element(arrow_cast(column1, 'LargeList(Int64)'), column2) from slices; +---- +NULL +12 +NULL +37 +NULL +NULL +55 + +query I +select array_element(column1, column2) from fixed_slices; +---- +NULL +12 +NULL +37 +NULL +55 + +# array_element with columns and scalars +query II +select array_element(make_array(1, 2, 3, 4, 5), column2), array_element(column1, 3) from slices; +---- +1 3 +2 13 +NULL 23 +2 33 +4 NULL +NULL 43 +5 NULL + +query II +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), column2), array_element(arrow_cast(column1, 'LargeList(Int64)'), 3) from slices; +---- +1 3 +2 13 +NULL 23 +2 33 +4 NULL +NULL 43 +5 NULL + +query II +select array_element(make_array(1, 2, 3, 4, 5), column2), array_element(column1, 3) from fixed_slices; +---- +1 3 +2 13 +NULL 23 +2 33 +NULL 43 +5 NULL + +# array_element of empty array +query T +select coalesce(array_element([], 1), array_element(NULL, 1), 'ok'); +---- +ok + + +## array_max +# array_max scalar function #1 (with positive index) +query I +select array_max(make_array(5, 3, 6, 4)); +---- +6 + +query I +select array_max(make_array(5, 3, 4, NULL, 6, NULL)); +---- +6 + +query ? +select array_max(make_array(NULL, NULL)); +---- +NULL + +query T +select array_max(make_array('h', 'e', 'o', 'l', 'l')); +---- +o + +query T +select array_max(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL)); +---- +o + +query B +select array_max(make_array(false, true, false, true)); +---- +true + +query B +select array_max(make_array(false, true, NULL, false, true)); +---- +true + +query D +select array_max(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1999-05-01', DATE '1985-11-01')); +---- +1999-05-01 + +query D +select array_max(make_array(DATE '1995-09-01', DATE '1999-05-01', DATE '1993-03-01', NULL)); +---- +1999-05-01 + +query P +select array_max(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1995-06-01', TIMESTAMP '1984-10-01')); +---- +1995-06-01T00:00:00 + +query P +select array_max(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01')); +---- +1996-10-01T00:00:00 + +query R +select array_max(make_array(5.1, -3.2, 6.3, 4.9)); +---- +6.3 + +query ?I +select input, array_max(input) from (select make_array(d - 1, d, d + 1) input from (values (0), (10), (20), (30), (NULL)) t(d)) +---- +[-1, 0, 1] 1 +[9, 10, 11] 11 +[19, 20, 21] 21 +[29, 30, 31] 31 +[NULL, NULL, NULL] NULL + +query II +select array_max(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_max(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +3 1 + +query II +select array_max(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_max(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +3 1 + +query ? +select array_max(make_array()); +---- +NULL + +# Testing with empty arguments should result in an error +query error DataFusion error: Error during planning: 'array_max' does not support zero arguments +select array_max(); + +# array_max over multiple rows (exercises the offsets-based iteration) +query I +select array_max(column1) from (values + (make_array(1, 5, 3)), + (make_array(10, 2, 8)), + (NULL), + (make_array(NULL, 7, NULL)), + (make_array(100)) +) as t(column1); +---- +5 +10 +NULL +7 +100 + +# array_max with NaN values (NaN should not be returned as max) +query R +select array_max(make_array(1.0, 'NaN'::double, 3.0)); +---- +NaN + +query R +select array_max(make_array('NaN'::double, 'NaN'::double)); +---- +NaN + +query R +select array_max(make_array('NaN'::double, NULL)); +---- +NaN + +# array_max with Int32 (exercises a different primitive type than Int64) +query I +select array_max(arrow_cast(make_array(10, -5, 3), 'List(Int32)')); +---- +10 + +## array_min + +query I +select array_min(make_array(5, 3, 6, 4)); +---- +3 + +query I +select array_min(make_array(5, 3, 4, NULL, 6, NULL)); +---- +3 + +query ? +select array_min(make_array(NULL, NULL)); +---- +NULL + +query T +select array_min(make_array('h', 'e', 'o', 'l', 'l')); +---- +e + +query T +select array_min(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL)); +---- +e + +query B +select array_min(make_array(false, true, false, true)); +---- +false + +query B +select array_min(make_array(false, true, NULL, false, true)); +---- +false + +query D +select array_min(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1999-05-01', DATE '1985-11-01')); +---- +1985-11-01 + +query D +select array_min(make_array(DATE '1995-09-01', DATE '1999-05-01', DATE '1993-03-01', NULL)); +---- +1993-03-01 + +query P +select array_min(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1995-06-01', TIMESTAMP '1984-10-01')); +---- +1984-10-01T00:00:00 + +query P +select array_min(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01')); +---- +1995-06-01T00:00:00 + +query R +select array_min(make_array(5.1, -3.2, 6.3, 4.9)); +---- +-3.2 + +query ?I +select input, array_min(input) from (select make_array(d - 1, d, d + 1) input from (values (0), (10), (20), (30), (NULL)) t(d)) +---- +[-1, 0, 1] -1 +[9, 10, 11] 9 +[19, 20, 21] 19 +[29, 30, 31] 29 +[NULL, NULL, NULL] NULL + +query II +select array_min(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_min(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +1 1 + +query II +select array_min(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_min(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +1 1 + +query ? +select array_min(make_array()); +---- +NULL + +# Testing with empty arguments should result in an error +query error DataFusion error: Error during planning: 'array_min' does not support zero arguments +select array_min(); + +# array_min over multiple rows (exercises the offsets-based iteration) +query I +select array_min(column1) from (values + (make_array(1, 5, 3)), + (make_array(10, 2, 8)), + (NULL), + (make_array(NULL, 7, NULL)), + (make_array(100)) +) as t(column1); +---- +1 +2 +NULL +7 +100 + +# array_min with NaN values (NaN should not be returned as min) +query R +select array_min(make_array(1.0, 'NaN'::double, 3.0)); +---- +1 + +query R +select array_min(make_array('NaN'::double, 'NaN'::double)); +---- +NaN + +query R +select array_min(make_array('NaN'::double, NULL)); +---- +NaN + +# array_min with Int32 (exercises a different primitive type than Int64) +query I +select array_min(arrow_cast(make_array(10, -5, 3), 'List(Int32)')); +---- +-5 + +# array_min/array_max preserve parameterized primitive metadata +query PPTT +select + array_min(ts_list), + array_max(ts_list), + arrow_typeof(array_min(ts_list)), + arrow_typeof(array_max(ts_list)) +from ( + select arrow_cast( + make_array( + arrow_cast(20, 'Timestamp(Nanosecond, Some("UTC"))'), + arrow_cast(10, 'Timestamp(Nanosecond, Some("UTC"))'), + arrow_cast(30, 'Timestamp(Nanosecond, Some("UTC"))') + ), + 'List(Timestamp(Nanosecond, Some("UTC")))' + ) as ts_list +) t; +---- +1970-01-01T00:00:00.000000010Z 1970-01-01T00:00:00.000000030Z Timestamp(ns, "UTC") Timestamp(ns, "UTC") + +query RRTT +select + array_min(dec_list), + array_max(dec_list), + arrow_typeof(array_min(dec_list)), + arrow_typeof(array_max(dec_list)) +from ( + select arrow_cast( + make_array( + arrow_cast(200, 'Decimal128(20, 4)'), + arrow_cast(100, 'Decimal128(20, 4)'), + arrow_cast(300, 'Decimal128(20, 4)') + ), + 'List(Decimal128(20, 4))' + ) as dec_list +) t; +---- +100 300 Decimal128(20, 4) Decimal128(20, 4) + + +## array_pop_back (aliases: `list_pop_back`) + +# array_pop_back scalar function with null +#TODO: https://github.com/apache/datafusion/issues/7142 +# follow clickhouse and duckdb +#query ? +#select array_pop_back(null); +#---- +#NULL + +# array_pop_back scalar function #1 +query ?? +select array_pop_back(make_array(1, 2, 3, 4, 5)), array_pop_back(make_array('h', 'e', 'l', 'l', 'o')); +---- +[1, 2, 3, 4] [h, e, l, l] + +query ?? +select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), array_pop_back(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +[1, 2, 3, 4] [h, e, l, l] + +query ?? +select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); +---- +[1, 2, 3, 4] [h, e, l, l] + +# array_pop_back scalar function #2 (after array_pop_back, array is empty) +query ? +select array_pop_back(make_array(1)); +---- +[] + +query ? +select array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +[] + +query ? +select array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +[] + +# array_pop_back scalar function #3 (array_pop_back the empty array) +query ? +select array_pop_back(array_pop_back(make_array(1))); +---- +[] + +query ? +select array_pop_back(array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64)'))); +---- +[] + +query ? +select array_pop_back(array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'))); +---- +[] + +# array_pop_back scalar function #4 (array_pop_back the arrays which have NULL) +query ?? +select array_pop_back(make_array(1, 2, 3, 4, NULL)), array_pop_back(make_array(NULL, 'e', 'l', NULL, 'o')); +---- +[1, 2, 3, 4] [NULL, e, l, NULL] + +query ?? +select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'LargeList(Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'LargeList(Utf8)')); +---- +[1, 2, 3, 4] [NULL, e, l, NULL] + +query ?? +select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'FixedSizeList(5, Utf8)')); +---- +[1, 2, 3, 4] [NULL, e, l, NULL] + +# array_pop_back scalar function #5 (array_pop_back the nested arrays) +query ? +select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6))); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'LargeList(List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +# array_pop_back scalar function #6 (array_pop_back the nested arrays with NULL) +query ? +select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL)); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL), 'LargeList(List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL), 'FixedSizeList(6, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +# array_pop_back scalar function #7 (array_pop_back the nested arrays with NULL) +query ? +select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4))); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], NULL] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)), 'LargeList(List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], NULL] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)), 'FixedSizeList(5, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], NULL] + +# array_pop_back scalar function #8 (after array_pop_back, nested array is empty) +query ? +select array_pop_back(make_array(make_array(1, 2, 3))); +---- +[] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(List(Int64))')); +---- +[] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)), 'FixedSizeList(1, List(Int64))')); +---- +[] + +# array_pop_back with columns +query ? +select array_pop_back(column1) from arrayspop; +---- +[1, 2] +[3, 4, 5] +[6, 7, 8, NULL] +[NULL, NULL] +NULL +[NULL, 10, 11] + +query ? +select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from arrayspop; +---- +[1, 2] +[3, 4, 5] +[6, 7, 8, NULL] +[NULL, NULL] +NULL +[NULL, 10, 11] + +query ? +select array_pop_back(column1) from large_arrayspop; +---- +[1, 2] +[3, 4, 5] +[6, 7, 8, NULL] +[NULL, NULL] +NULL +[NULL, 10, 11] + +query ? +select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from large_arrayspop; +---- +[1, 2] +[3, 4, 5] +[6, 7, 8, NULL] +[NULL, NULL] +NULL +[NULL, 10, 11] + +## array_pop_front (aliases: `list_pop_front`) + +#TODO:https://github.com/apache/datafusion/issues/7142 +# array_pop_front scalar function with null +# follow clickhouse and duckdb +#query ? +#select array_pop_front(null); +#---- +#NULL + +# array_pop_front scalar function #1 +query ?? +select array_pop_front(make_array(1, 2, 3, 4, 5)), array_pop_front(make_array('h', 'e', 'l', 'l', 'o')); +---- +[2, 3, 4, 5] [e, l, l, o] + +query ?? +select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), array_pop_front(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +[2, 3, 4, 5] [e, l, l, o] + +query ?? +select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_pop_front(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); +---- +[2, 3, 4, 5] [e, l, l, o] + +# array_pop_front scalar function #2 (after array_pop_front, array is empty) +query ? +select array_pop_front(make_array(1)); +---- +[] + +query ? +select array_pop_front(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +[] + +query ? +select array_pop_front(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +[] + +# array_pop_front scalar function #3 (array_pop_front the empty array) +query ? +select array_pop_front(array_pop_front(make_array(1))); +---- +[] + +query ? +select array_pop_front(array_pop_front(arrow_cast(make_array(1), 'LargeList(Int64)'))); +---- +[] + +query ? +select array_pop_front(array_pop_front(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'))); +---- +[] + +# array_pop_front scalar function #5 (array_pop_front the nested arrays) +query ? +select array_pop_front(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6))); +---- +[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] + +query ? +select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'LargeList(List(Int64))')); +---- +[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] + +query ? +select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))')); +---- +[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] + +# array_pop_front scalar function #6 (array_pop_front the nested arrays with NULL) +query ? +select array_pop_front(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4))); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +query ? +select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)), 'LargeList(List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +query ? +select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)), 'FixedSizeList(6, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +# array_pop_front scalar function #8 (after array_pop_front, nested array is empty) +query ? +select array_pop_front(make_array(make_array(1, 2, 3))); +---- +[] + +query ? +select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(List(Int64))')); +---- +[] + +query ? +select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)), 'FixedSizeList(1, List(Int64))')); +---- +[] + +## array_slice (aliases: list_slice) + +# array_slice scalar function #1 (with positive indexes) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 2, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 2); +---- +[2, 3, 4] [h, e] + +query ???? +select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, 2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, 2), + array_slice(make_array(1, 2, 3, 4, 5), 0, 5, 2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 5, 2); +---- +[1, 3, 5] [h, l, o] [1, 3, 5] [h, l, o] + +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, -1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, -1); +---- +[] [] + +query error Execution error: array_slice got invalid stride: 0, it cannot be 0 +select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, 0), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, 0); + +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 5, 1, -2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 5, 1, -2); +---- +[5, 3, 1] [o, l, h] + +# Test NULL stride +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, NULL); +---- +NULL NULL + +# Test NULL stride +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 1, 5, NULL), + array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 1, 5, NULL); +---- +NULL NULL + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 1, 2); +---- +[2, 3, 4] [h, e] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2, 4), + array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 1, 2); +---- +[2, 3, 4] [h, e] + +# array_slice scalar function #2 (with positive indexes; full array) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 0, 6), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 5); +---- +[1, 2, 3, 4, 5] [h, e, l, l, o] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, 6), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, 5); +---- +[1, 2, 3, 4, 5] [h, e, l, l, o] + +# TODO make error message nicer: https://github.com/apache/datafusion/issues/19004 +# Expected output (once supported): +# ---- +# [1, 2, 3, 4, 5] [h, e, l, l, o] +query error Failed to coerce arguments to satisfy a call to 'array_slice' function: +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'ListView(Int64)'), 0, 6), + array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'ListView(Utf8)'), 0, 5); + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 0, 6), + array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 0, 5); +---- +[1, 2, 3, 4, 5] [h, e, l, l, o] + +# array_slice scalar function #3 (with positive indexes; first index = second index) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 4, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 3, 3); +---- +[4] [l] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 4, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3, 3); +---- +[4] [l] + +# array_slice scalar function #4 (with positive indexes; first index > second_index) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 2, 1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 4, 1); +---- +[] [] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 1), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 4, 1); +---- +[] [] + +# array_slice scalar function #5 (with positive indexes; out of bounds) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 2, 6), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 3, 7); +---- +[2, 3, 4, 5] [l, l, o] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 6), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3, 7); +---- +[2, 3, 4, 5] [l, l, o] + +# TODO: Enable once array_slice supports LargeListView types. +# Expected output (once supported): +# ---- +# [2, 3, 4, 5] [l, l, o] +query error Failed to coerce arguments to satisfy a call to 'array_slice' function: +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeListView(Int64)'), 2, 6), + array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeListView(Utf8)'), 3, 7); + + +# array_slice scalar function #6 (with positive indexes; nested array) +query ? +select array_slice(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 1, 1); +---- +[[1, 2, 3, 4, 5]] + +query ? +select array_slice(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'LargeList(List(Int64))'), 1, 1); +---- +[[1, 2, 3, 4, 5]] + +# array_slice scalar function #7 (with zero and positive number) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 0, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 3); +---- +[1, 2, 3, 4] [h, e, l] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, 3); +---- +[1, 2, 3, 4] [h, e, l] + +# array_slice scalar function #8 (with NULL and positive number) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), NULL, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), NULL, 3); +---- +NULL NULL + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL, 3); +---- +NULL NULL + +# array_slice scalar function #9 (with positive number and NULL) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 2, NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 3, NULL); +---- +NULL NULL + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, NULL), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3, NULL); +---- +NULL NULL + +# array_slice scalar function #10 (with zero-zero) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 0, 0), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 0); +---- +[] [] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, 0), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, 0); +---- +[] [] + +# array_slice scalar function #11 (with NULL-NULL) +query error +select array_slice(make_array(1, 2, 3, 4, 5), NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), NULL); + +query error +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL); + +# array_slice scalar function #12 (with zero and negative number) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 0, -4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, -3); +---- +[1, 2] [h, e, l] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, -4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, -3); +---- +[1, 2] [h, e, l] + +# array_slice scalar function #13 (with negative number and NULL) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -2, NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, NULL); +---- +NULL NULL + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -2, NULL), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, NULL); +---- +NULL NULL + +# array_slice scalar function #14 (with NULL and negative number) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), NULL, -4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), NULL, -3); +---- +NULL NULL + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, -4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL, -3); +---- +NULL NULL + +# array_slice scalar function #15 (with negative indexes) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -4, -1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, -1); +---- +[2, 3, 4, 5] [l, l, o] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -4, -1), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, -1); +---- +[2, 3, 4, 5] [l, l, o] + +# array_slice scalar function #16 (with negative indexes; almost full array (only with negative indices cannot return full array)) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -5, -1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -5, -1); +---- +[1, 2, 3, 4, 5] [h, e, l, l, o] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -5, -1), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -5, -1); +---- +[1, 2, 3, 4, 5] [h, e, l, l, o] + +# array_slice scalar function #17 (with negative indexes; first index = second index) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -4, -4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, -3); +---- +[2] [l] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -4, -4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, -3); +---- +[2] [l] + +# array_slice scalar function #18 (with negative indexes; first index > second_index) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -4, -6), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, -6); +---- +[] [] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -4, -6), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, -6); +---- +[] [] + +# array_slice scalar function #19 (with negative indexes; out of bounds) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -7, -2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -7, -3); +---- +[1, 2, 3, 4] [h, e, l] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -7, -2), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -7, -3); +---- +[1, 2, 3, 4] [h, e, l] + +# array_slice scalar function #20 (with negative indexes; nested array) +query ?? +select array_slice(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), -2, -1), array_slice(make_array(make_array(1, 2, 3), make_array(6, 7, 8)), -1, -1); +---- +[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]] [[6, 7, 8]] + +query ?? +select array_slice(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'LargeList(List(Int64))'), -2, -1), array_slice(arrow_cast(make_array(make_array(1, 2, 3), make_array(6, 7, 8)), 'LargeList(List(Int64))'), -1, -1); +---- +[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]] [[6, 7, 8]] + + +# array_slice scalar function #21 (with first positive index and last negative index) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 2, -3), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 2, -2); +---- +[2, 3] [e, l, l] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, -3), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 2, -2); +---- +[2, 3] [e, l, l] + +# array_slice scalar function #22 (with first negative index and last positive index) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -2, 5), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, 4); +---- +[4, 5] [l, l] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -2, 5), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, 4); +---- +[4, 5] [l, l] + +# list_slice scalar function #23 (function alias `array_slice`) +query ?? +select list_slice(make_array(1, 2, 3, 4, 5), 2, 4), list_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 2); +---- +[2, 3, 4] [h, e] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 1, 2); +---- +[2, 3, 4] [h, e] + +# array_slice scalar function #24 (with first negative index larger than len) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -2147483648, 1), list_slice(make_array('h', 'e', 'l', 'l', 'o'), -2147483648, 1); +---- +[1] [h] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -9223372036854775808, 1), list_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -9223372036854775808, 1); +---- +[1] [h] + +# array_slice scalar function #25 (with negative step and equal indexes) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 2, 2, -1), list_slice(make_array('h', 'e', 'l', 'l', 'o'), 2, 2, -1); +---- +[2] [e] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 2, -1), list_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 2, 2, -1); +---- +[2] [e] + +# array_slice with columns +query ? +select array_slice(column1, column2, column3) from slices; +---- +[NULL] +[12, 13, 14, 15, 16, 17] +[] +[] +NULL +NULL +NULL + +query ? +select array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from slices; +---- +[NULL] +[12, 13, 14, 15, 16, 17] +[] +[] +NULL +NULL +NULL + +# TODO: support NULLS in output instead of `[]` +# array_slice with columns and scalars +query ??? +select array_slice(make_array(1, 2, 3, 4, 5), column2, column3), array_slice(column1, 3, column3), array_slice(column1, column2, 5) from slices; +---- +[1] [] [NULL, 2, 3, 4, 5] +[2] [13, 14, 15, 16, 17] [12, 13, 14, 15] +[] [] [21, 22, 23, NULL, 25] +[] [33, 34] [] +[4, 5] NULL NULL +NULL [43, 44, 45, 46] NULL +NULL NULL [55] + +query ??? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), column2, column3), array_slice(arrow_cast(column1, 'LargeList(Int64)'), 3, column3), array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, 5) from slices; +---- +[1] [] [NULL, 2, 3, 4, 5] +[2] [13, 14, 15, 16, 17] [12, 13, 14, 15] +[] [] [21, 22, 23, NULL, 25] +[] [33, 34] [] +[4, 5] NULL NULL +NULL [43, 44, 45, 46] NULL +NULL NULL [55] + +# Test issue: https://github.com/apache/datafusion/issues/10425 +# `from` may be larger than `to` and `stride` is positive +query ???? +select array_slice(a, -1, 2, 1), array_slice(a, -1, 2), + array_slice(a, 3, 2, 1), array_slice(a, 3, 2) + from (values ([1.0, 2.0, 3.0, 3.0]), ([4.0, 5.0, 3.0]), ([6.0])) t(a); +---- +[] [] [] [] +[] [] [] [] +[6.0] [6.0] [] [] + +# array_slice with overlapping nulls across multiple inputs +query ? +select array_slice(column1, column2, column3) from ( + values + (make_array(1, 2, 3), NULL, NULL), + (NULL, NULL, 3), + (NULL, 1, NULL), + (make_array(4, 5, 6), 1, 3) +) as t(column1, column2, column3); +---- +NULL +NULL +NULL +[4, 5, 6] + +query ? +select array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from ( + values + (make_array(1, 2, 3), NULL, NULL), + (NULL, NULL, 3), + (NULL, 1, NULL), + (make_array(4, 5, 6), 1, 3) +) as t(column1, column2, column3); +---- +NULL +NULL +NULL +[4, 5, 6] + +# array_slice with overlapping nulls including stride +query ? +select array_slice(column1, column2, column3, column4) from ( + values + (make_array(1, 2, 3, 4, 5), 1, 5, NULL), + (NULL, NULL, 3, 2), + (make_array(1, 2, 3, 4, 5), NULL, NULL, NULL), + (make_array(1, 2, 3, 4, 5), 1, 5, 2) +) as t(column1, column2, column3, column4); +---- +NULL +NULL +NULL +[1, 3, 5] + +# Testing with empty arguments should result in an error +query error DataFusion error: Error during planning: 'array_slice' does not support zero arguments +select array_slice(); + +query error Failed to coerce arguments +select array_slice(3.5, NULL, NULL); + +## array_any_value (aliases: list_any_value) + +# Testing with empty arguments should result in an error +query error +select array_any_value(); + +# Testing with non-array arguments should result in an error +query error +select array_any_value(1), array_any_value('a'), array_any_value(NULL); + +# array_any_value scalar function #1 (with null and non-null elements) + +query IT?I +select array_any_value(make_array(NULL, 1, 2, 3, 4, 5)), array_any_value(make_array(NULL, 'h', 'e', 'l', 'l', 'o')), array_any_value(make_array(NULL, NULL)), array_any_value(make_array(NULL, NULL, 1, 2, 3)); +---- +1 h NULL 1 + +query ITITI +select array_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'LargeList(Int64)')), array_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'LargeList(Int64)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'LargeList(Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL, 1, 2, 3), 'LargeList(Int64)'));; +---- +1 h NULL NULL 1 + +query ITITI +select array_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'FixedSizeList(6, Int64)')), array_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'FixedSizeList(6, Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'FixedSizeList(2, Int64)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'FixedSizeList(2, Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL, 1, 2, 3, 4), 'FixedSizeList(6, Int64)')); +---- +1 h NULL NULL 1 + +# array_any_value scalar function #2 (with nested array) + +query ? +select array_any_value(make_array(NULL, make_array(NULL, 1, 2, 3, 4, 5), make_array(NULL, 6, 7, 8, 9, 10))); +---- +[NULL, 1, 2, 3, 4, 5] + +query ? +select array_any_value(arrow_cast(make_array(NULL, make_array(NULL, 1, 2, 3, 4, 5), make_array(NULL, 6, 7, 8, 9, 10)), 'LargeList(List(Int64))')); +---- +[NULL, 1, 2, 3, 4, 5] + +query ? +select array_any_value(arrow_cast(make_array(NULL, make_array(NULL, 1, 2, 3, 4, 5), make_array(NULL, 6, 7, 8, 9, 10)), 'FixedSizeList(3, List(Int64))')); +---- +[NULL, 1, 2, 3, 4, 5] + +# array_any_value scalar function #3 (using function alias `list_any_value`) +query IT +select list_any_value(make_array(NULL, 1, 2, 3, 4, 5)), list_any_value(make_array(NULL, 'h', 'e', 'l', 'l', 'o')); +---- +1 h + +query IT +select list_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'LargeList(Int64)')), list_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +1 h + +query IT +select list_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'FixedSizeList(6, Int64)')), list_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'FixedSizeList(6, Utf8)')); +---- +1 h + +# array_any_value with columns + +query I +select array_any_value(column1) from slices; +---- +2 +11 +21 +31 +NULL +41 +51 + +query I +select array_any_value(arrow_cast(column1, 'LargeList(Int64)')) from slices; +---- +2 +11 +21 +31 +NULL +41 +51 + +query I +select array_any_value(column1) from fixed_slices; +---- +2 +11 +21 +31 +41 +51 + +# array_any_value with columns and scalars + +query II +select array_any_value(make_array(NULL, 1, 2, 3, 4, 5)), array_any_value(column1) from slices; +---- +1 2 +1 11 +1 21 +1 31 +1 NULL +1 41 +1 51 + +query II +select array_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'LargeList(Int64)')), array_any_value(arrow_cast(column1, 'LargeList(Int64)')) from slices; +---- +1 2 +1 11 +1 21 +1 31 +1 NULL +1 41 +1 51 + +query II +select array_any_value(make_array(NULL, 1, 2, 3, 4, 5)), array_any_value(column1) from fixed_slices; +---- +1 2 +1 11 +1 21 +1 31 +1 41 +1 51 + +# make_array with nulls +query ??????? +select make_array(make_array('a','b'), null), + make_array(make_array('a','b'), null, make_array('c','d')), + make_array(null, make_array('a','b'), null), + make_array(null, make_array('a','b'), null, null, make_array('c','d')), + make_array(['a', 'bc', 'def'], null, make_array('rust')), + make_array([1,2,3], null, make_array(4,5,6,7)), + make_array(null, 1, null, 2, null, 3, null, null, 4, 5); +---- +[[a, b], NULL] [[a, b], NULL, [c, d]] [NULL, [a, b], NULL] [NULL, [a, b], NULL, NULL, [c, d]] [[a, bc, def], NULL, [rust]] [[1, 2, 3], NULL, [4, 5, 6, 7]] [NULL, 1, NULL, 2, NULL, 3, NULL, NULL, 4, 5] + +query ? +select make_array(column5, null, column5) from arrays_values_without_nulls; +---- +[[2, 3], NULL, [2, 3]] +[[4, 5], NULL, [4, 5]] +[[6, 7], NULL, [6, 7]] +[[8, 9], NULL, [8, 9]] + +query ? +select make_array(['a','b'], null); +---- +[[a, b], NULL] + +## array_sort (aliases: `list_sort`) +query ??? +select array_sort(make_array(1, 3, null, 5, NULL, -5)), array_sort(make_array(1, 3, null, 2), 'ASC'), array_sort(make_array(1, 3, null, 2), 'desc', 'NULLS FIRST'); +---- +[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] + +query ??? +select array_sort(arrow_cast(make_array(1, 3, null, 5, NULL, -5), 'LargeList(Int64)')), + array_sort(arrow_cast(make_array(1, 3, null, 2), 'LargeList(Int64)'), 'ASC'), + array_sort(arrow_cast(make_array(1, 3, null, 2), 'LargeList(Int64)'), 'desc', 'NULLS FIRST'); +---- +[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] + +query ??? +select array_sort(arrow_cast(make_array(1, 3, null, 5, NULL, -5), 'FixedSizeList(6, Int64)')), + array_sort(arrow_cast(make_array(1, 3, null, 2), 'FixedSizeList(4, Int64)'), 'ASC'), + array_sort(arrow_cast(make_array(1, 3, null, 2), 'FixedSizeList(4, Int64)'), 'desc', 'NULLS FIRST'); +---- +[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] + +query ? +select array_sort(column1, 'DESC', 'NULLS LAST') from arrays_values; +---- +[10, 9, 8, 7, 6, 5, 4, 3, 2, NULL] +[20, 18, 17, 16, 15, 14, 13, 12, 11, NULL] +[30, 29, 28, 27, 26, 25, 23, 22, 21, NULL] +[40, 39, 38, 37, 35, 34, 33, 32, 31, NULL] +NULL +[50, 49, 48, 47, 46, 45, 44, 43, 42, 41] +[60, 59, 58, 57, 56, 55, 54, 52, 51, NULL] +[70, 69, 68, 67, 66, 65, 64, 63, 62, 61] + +query ? +select array_sort(column1, 'ASC', 'NULLS FIRST') from arrays_values; +---- +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[NULL, 11, 12, 13, 14, 15, 16, 17, 18, 20] +[NULL, 21, 22, 23, 25, 26, 27, 28, 29, 30] +[NULL, 31, 32, 33, 34, 35, 37, 38, 39, 40] +NULL +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[NULL, 51, 52, 54, 55, 56, 57, 58, 59, 60] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70] + +# test with empty table +query ? +select array_sort(column1, 'DESC', 'NULLS FIRST') from arrays_values where false; +---- + +# test with empty array +query ? +select array_sort([]); +---- +[] + +# empty-but-non-null string arrays should remain non-null, not become null +query ?B +select array_sort(column1), array_sort(column1) is null +from (values (arrow_cast(make_array('b', 'a'), 'List(Utf8)')), (arrow_cast([], 'List(Utf8)'))) as t(column1); +---- +[a, b] false +[] false + +# test with null arguments +query ? +select array_sort(NULL); +---- +NULL + +query ? +select array_sort(column1, NULL) from arrays_values; +---- +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL + +query ?? +select array_sort(column1, 'DESC', NULL), array_sort(column1, 'ASC', NULL) from arrays_values; +---- +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL + +query ?? +select array_sort(column1, NULL, 'NULLS FIRST'), array_sort(column1, NULL, 'NULLS LAST') from arrays_values; +---- +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL + +# maintains inner nullability +query ?T +select array_sort(column1), arrow_typeof(array_sort(column1)) +from values + (arrow_cast([], 'List(non-null Int32)')), + (arrow_cast(NULL, 'List(non-null Int32)')), + (arrow_cast([1, 3, 5, -5], 'List(non-null Int32)')) +; +---- +[] List(non-null Int32) +NULL List(non-null Int32) +[-5, 1, 3, 5] List(non-null Int32) + +query ?T +select column1, arrow_typeof(column1) +from values (array_sort(arrow_cast([1, 3, 5, -5], 'LargeList(non-null Int32)'))); +---- +[-5, 1, 3, 5] LargeList(non-null Int32) + +query ?T +select column1, arrow_typeof(column1) +from values (array_sort(arrow_cast([1, 3, 5, -5], 'FixedSizeList(4 x non-null Int32)'))); +---- +[-5, 1, 3, 5] List(non-null Int32) + +# arrays of strings +query ??? +select array_sort(make_array('banana', 'apple', null, 'cherry')), + array_sort(make_array('banana', 'apple', null, 'cherry'), 'DESC', 'NULLS LAST'), + array_sort(make_array('banana', 'apple', null, 'cherry'), 'ASC', 'NULLS LAST'); +---- +[NULL, apple, banana, cherry] [cherry, banana, apple, NULL] [apple, banana, cherry, NULL] + +query ? +select array_sort([struct('foo', 3), struct('foo', 1), struct('bar', 1)]) +---- +[{c0: bar, c1: 1}, {c0: foo, c1: 1}, {c0: foo, c1: 3}] + +## test with argument of incorrect types +query error DataFusion error: Execution error: the second parameter of array_sort expects DESC or ASC +select array_sort([1, 3, null, 5, NULL, -5], 1), array_sort([1, 3, null, 5, NULL, -5], 'DESC', 1), array_sort([1, 3, null, 5, NULL, -5], 1, 1); + +# test with empty row, the row that does not match the condition has row count 0 +statement ok +create table t1(a int, b int) as values (100, 1), (101, 2), (102, 3), (101, 2); + +# rowsort is to ensure the order of group by is deterministic, array_sort has no effect here, since the sum() always returns single row. +query ? rowsort +select array_sort([sum(a)]) from t1 where a > 100 group by b; +---- +[102] +[202] + +statement ok +drop table t1; + +# float arrays with NaN and Infinity (NaN sorts after Infinity per IEEE totalOrder) +query ??? +select array_sort(make_array(1.0, 'NaN'::double, -1.0, 'Infinity'::double, '-Infinity'::double, null)), + array_sort(make_array(1.0, 'NaN'::double, -1.0, 'Infinity'::double, '-Infinity'::double, null), 'DESC', 'NULLS LAST'), + array_sort(make_array('NaN'::double, 'NaN'::double, 1.0)); +---- +[NULL, -inf, -1.0, 1.0, inf, NaN] [NaN, inf, 1.0, -1.0, -inf, NULL] [1.0, NaN, NaN] + +# float32 arrays +query ?? +select array_sort(arrow_cast(make_array(3.0, 1.0, 'NaN'::double, null, 2.0), 'List(Float32)')), + array_sort(arrow_cast(make_array(3.0, 1.0, 'NaN'::double, null, 2.0), 'List(Float32)'), 'DESC', 'NULLS LAST'); +---- +[NULL, 1.0, 2.0, 3.0, NaN] [NaN, 3.0, 2.0, 1.0, NULL] + +# element-level nulls with all sort option combinations +query ???? +select array_sort(make_array(3, null, 1, null, 2), 'ASC', 'NULLS FIRST'), + array_sort(make_array(3, null, 1, null, 2), 'ASC', 'NULLS LAST'), + array_sort(make_array(3, null, 1, null, 2), 'DESC', 'NULLS FIRST'), + array_sort(make_array(3, null, 1, null, 2), 'DESC', 'NULLS LAST'); +---- +[NULL, NULL, 1, 2, 3] [1, 2, 3, NULL, NULL] [NULL, NULL, 3, 2, 1] [3, 2, 1, NULL, NULL] + +# timestamp arrays +query ?? +select array_sort(make_array(arrow_cast('2024-01-15T10:00:00', 'Timestamp(Nanosecond, None)'), + arrow_cast('2024-01-01T00:00:00', 'Timestamp(Nanosecond, None)'), + null, + arrow_cast('2024-06-15T12:00:00', 'Timestamp(Nanosecond, None)'))), + array_sort(make_array(arrow_cast('2024-01-15T10:00:00', 'Timestamp(Nanosecond, None)'), + arrow_cast('2024-01-01T00:00:00', 'Timestamp(Nanosecond, None)'), + null, + arrow_cast('2024-06-15T12:00:00', 'Timestamp(Nanosecond, None)')), 'DESC', 'NULLS LAST'); +---- +[NULL, 2024-01-01T00:00:00, 2024-01-15T10:00:00, 2024-06-15T12:00:00] [2024-06-15T12:00:00, 2024-01-15T10:00:00, 2024-01-01T00:00:00, NULL] + +# date arrays +query ?? +select array_sort(make_array('2024-03-01'::date, '2024-01-01'::date, null, '2024-02-01'::date)), + array_sort(make_array('2024-03-01'::date, '2024-01-01'::date, null, '2024-02-01'::date), 'DESC', 'NULLS LAST'); +---- +[NULL, 2024-01-01, 2024-02-01, 2024-03-01] [2024-03-01, 2024-02-01, 2024-01-01, NULL] + +# struct arrays with nulls and DESC +query ?? +select array_sort([struct('b', 2), struct('a', 1), null, struct('a', 3)]), + array_sort([struct('b', 2), struct('a', 1), null, struct('a', 3)], 'DESC', 'NULLS LAST'); +---- +[NULL, {c0: a, c1: 1}, {c0: a, c1: 3}, {c0: b, c1: 2}] [{c0: b, c1: 2}, {c0: a, c1: 3}, {c0: a, c1: 1}, NULL] + +# boolean arrays +query ?? +select array_sort(make_array(true, false, null, true, false)), + array_sort(make_array(true, false, null, true, false), 'DESC', 'NULLS LAST'); +---- +[NULL, false, false, true, true] [true, true, false, false, NULL] + +# all-null array +query ? +select array_sort(make_array(null, null, null)); +---- +[NULL, NULL, NULL] + +# single-element arrays +query ?? +select array_sort(make_array(42)), array_sort(make_array(null::int)); +---- +[42] [NULL] + +## list_sort (aliases: `array_sort`) +query ??? +select list_sort(make_array(1, 3, null, 5, NULL, -5)), list_sort(make_array(1, 3, null, 2), 'ASC'), list_sort(make_array(1, 3, null, 2), 'desc', 'NULLS FIRST'); +---- +[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] + + +## array_append (aliases: `list_append`, `array_push_back`, `list_push_back`) + +# array_append with NULLs + +query ? +select array_append(null, 1); +---- +[1] + +query ? +select array_append(null, [2, 3]); +---- +[[2, 3]] + +query ? +select array_append(null, [[4]]); +---- +[[[4]]] + +query ???? +select + array_append(make_array(), 4), + array_append(make_array(), null), + array_append(make_array(1, null, 3), 4), + array_append(make_array(null, null), 1) +; +---- +[4] [NULL] [1, NULL, 3, 4] [NULL, NULL, 1] + +query ???? +select + array_append(arrow_cast(make_array(), 'LargeList(Int64)'), 4), + array_append(arrow_cast(make_array(), 'LargeList(Int64)'), null), + array_append(arrow_cast(make_array(1, null, 3), 'LargeList(Int64)'), 4), + array_append(arrow_cast(make_array(null, null), 'LargeList(Int64)'), 1) +; +---- +[4] [NULL] [1, NULL, 3, 4] [NULL, NULL, 1] + +query ?? +select + array_append(arrow_cast(make_array(1, null, 3), 'FixedSizeList(3, Int64)'), 4), + array_append(arrow_cast(make_array(null, null), 'FixedSizeList(2, Int64)'), 1) +; +---- +[1, NULL, 3, 4] [NULL, NULL, 1] + +# test invalid (non-null) +query error +select array_append(1, 2); + +query error +select array_append(1, [2]); + +query error +select array_append([1], [2]); + +query ?? +select + array_append(make_array(make_array(1, null, 3)), make_array(null)), + array_append(make_array(make_array(1, null, 3)), null); +---- +[[1, NULL, 3], [NULL]] [[1, NULL, 3], NULL] + +query ?? +select + array_append(arrow_cast(make_array(make_array(1, null, 3)), 'LargeList(LargeList(Int64))'), arrow_cast(make_array(null), 'LargeList(Int64)')), + array_append(arrow_cast(make_array(make_array(1, null, 3)), 'LargeList(LargeList(Int64))'), null); +---- +[[1, NULL, 3], [NULL]] [[1, NULL, 3], NULL] + +query ?? +select + array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), [null]), + array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), null); +---- +[[1, NULL, 3], [NULL]] [[1, NULL, 3], NULL] + +# array_append scalar function #3 +query ??? +select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select array_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), array_append(arrow_cast(make_array('h', 'e', 'l', 'l'), 'LargeList(Utf8)'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select array_append(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'FixedSizeList(3, Float64)'), 4.0), array_append(arrow_cast(make_array('h', 'e', 'l', 'l'), 'FixedSizeList(4, Utf8)'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_append scalar function #4 (element is list) +query ??? +select array_append(make_array([1], [2], [3]), make_array(4)), array_append(make_array([1.0], [2.0], [3.0]), make_array(4.0)), array_append(make_array(['h'], ['e'], ['l'], ['l']), make_array('o')); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +query ??? +select array_append(arrow_cast(make_array([1], [2], [3]), 'LargeList(LargeList(Int64))'), arrow_cast(make_array(4), 'LargeList(Int64)')), array_append(arrow_cast(make_array([1.0], [2.0], [3.0]), 'LargeList(LargeList(Float64))'), arrow_cast(make_array(4.0), 'LargeList(Float64)')), array_append(arrow_cast(make_array(['h'], ['e'], ['l'], ['l']), 'LargeList(LargeList(Utf8))'), arrow_cast(make_array('o'), 'LargeList(Utf8)')); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +query ??? +select array_append(arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3, List(Int64))'), [4]), array_append(arrow_cast(make_array([1.0], [2.0], [3.0]), 'FixedSizeList(3, List(Float64))'), [4.0]), array_append(arrow_cast(make_array(['h'], ['e'], ['l'], ['l']), 'FixedSizeList(4, List(Utf8))'), ['o']); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +# list_append scalar function #5 (function alias `array_append`) +query ??? +select list_append(make_array(1, 2, 3), 4), list_append(make_array(1.0, 2.0, 3.0), 4.0), list_append(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select list_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), list_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), list_append(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_push_back scalar function #6 (function alias `array_append`) +query ??? +select array_push_back(make_array(1, 2, 3), 4), array_push_back(make_array(1.0, 2.0, 3.0), 4.0), array_push_back(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select array_push_back(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), array_push_back(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), array_push_back(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# list_push_back scalar function #7 (function alias `array_append`) +query ??? +select list_push_back(make_array(1, 2, 3), 4), list_push_back(make_array(1.0, 2.0, 3.0), 4.0), list_push_back(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select list_push_back(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), list_push_back(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), list_push_back(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_append with columns #1 +query ? +select array_append(column1, column2) from arrays_values; +---- +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 12] +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 23] +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 34] +[44] +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, NULL] +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, 55] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] + +query ? +select array_append(column1, column2) from large_arrays_values; +---- +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 12] +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 23] +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 34] +[44] +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, NULL] +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, 55] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] + +query ? +select array_append(column1, column2) from fixed_arrays_values; +---- +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 12] +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 23] +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 34] +[NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 44] +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, NULL] +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, 55] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] + +# array_append with columns #2 (element is list) +query ? +select array_append(column1, column2) from nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] + +query ? +select array_append(column1, column2) from large_nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] + +query ? +select array_append(column1, column2) from fixed_size_nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] + +# array_append with columns and scalars #1 +query ?? +select array_append(column2, 100.1), array_append(column3, '.') from arrays; +---- +[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] +[NULL, 5.5, 6.6, 100.1] [i, p, NULL, u, m, .] +[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .] +[10.1, NULL, 12.2, 100.1] [s, i, t, .] +[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .] +[100.1] [,, .] +[16.6, 17.7, 18.8, 100.1] [.] + +query ?? +select array_append(column2, 100.1), array_append(column3, '.') from large_arrays; +---- +[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] +[NULL, 5.5, 6.6, 100.1] [i, p, NULL, u, m, .] +[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .] +[10.1, NULL, 12.2, 100.1] [s, i, t, .] +[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .] +[100.1] [,, .] +[16.6, 17.7, 18.8, 100.1] [.] + +query ?? +select array_append(column2, 100.1), array_append(column3, '.') from fixed_size_arrays; +---- +[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] +[NULL, 5.5, 6.6, 100.1] [i, p, NULL, u, m, .] +[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .] +[10.1, NULL, 12.2, 100.1] [s, i, t, a, b, .] +[13.3, 14.4, 15.5, 100.1] [a, m, e, t, x, .] +[NULL, NULL, NULL, 100.1] [,, a, b, c, d, .] +[16.6, 17.7, 18.8, 100.1] [NULL, NULL, NULL, NULL, NULL, .] + +# array_append with columns and scalars #2 +query ?? +select array_append(column1, make_array(1, 11, 111)), array_append(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), column2) from nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] + +query ?? +select array_append(column1, arrow_cast(make_array(1, 11, 111), 'LargeList(Int64)')), array_append(arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'LargeList(LargeList(Int64))'), column2) from large_nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] + +query ?? +select array_append(column1, arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3, Int64)')), array_append(arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))'), column2) from fixed_size_nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] + +## array_prepend (aliases: `list_prepend`, `array_push_front`, `list_push_front`) + +# array_prepend with NULLs + +# DuckDB: [4] +# ClickHouse: Null +query ? +select array_prepend(4, NULL); +---- +[4] + +query ? +select array_prepend(4, []); +---- +[4] + +query ? +select array_prepend(4, [null]); +---- +[4, NULL] + +# DuckDB: [null] +# ClickHouse: [null] +query ? +select array_prepend(null, []); +---- +[NULL] + +query ? +select array_prepend(null, [1]); +---- +[NULL, 1] + +query ? +select array_prepend(null, [[1,2,3]]); +---- +[NULL, [1, 2, 3]] + +# DuckDB: [[]] +# ClickHouse: [[]] +# TODO: We may also return [[]] +query ? +select array_prepend([], []); +---- +[[]] + +query ? +select array_prepend(null, null); +---- +[NULL] + +query ? +select array_append([], null); +---- +[NULL] + + +# array_prepend scalar function #3 +query ??? +select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0, make_array(2.0, 3.0, 4.0)), array_prepend('h', make_array('e', 'l', 'l', 'o')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select array_prepend(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), array_prepend(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), array_prepend('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select array_prepend(1, arrow_cast([2, 3, 4], 'FixedSizeList(3, Int64)')), array_prepend(1.0, arrow_cast([2.0, 3.0, 4.0], 'FixedSizeList(3, Float64)')), array_prepend('h', arrow_cast(['e', 'l', 'l', 'o'], 'FixedSizeList(4, Utf8)')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_prepend scalar function #4 (element is list) +query ??? +select array_prepend(make_array(1), make_array(make_array(2), make_array(3), make_array(4))), array_prepend(make_array(1.0), make_array([2.0], [3.0], [4.0])), array_prepend(make_array('h'), make_array(['e'], ['l'], ['l'], ['o'])); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +query ??? +select array_prepend(arrow_cast(make_array(1), 'LargeList(Int64)'), arrow_cast(make_array(make_array(2), make_array(3), make_array(4)), 'LargeList(LargeList(Int64))')), + array_prepend(arrow_cast(make_array(1.0), 'LargeList(Float64)'), arrow_cast(make_array([2.0], [3.0], [4.0]), 'LargeList(LargeList(Float64))')), + array_prepend(arrow_cast(make_array('h'), 'LargeList(Utf8)'), arrow_cast(make_array(['e'], ['l'], ['l'], ['o']), 'LargeList(LargeList(Utf8))')); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +query ??? +select array_prepend(arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([[1], [2], [3]], 'FixedSizeList(3, List(Int64))')), + array_prepend(arrow_cast([1.0], 'FixedSizeList(1, Float64)'), arrow_cast([[2.0], [3.0], [4.0]], 'FixedSizeList(3, List(Float64))')), + array_prepend(arrow_cast(['h'], 'FixedSizeList(1, Utf8)'), arrow_cast([['e'], ['l'], ['l'], ['o']], 'FixedSizeList(4, List(Utf8))')); +---- +[[1], [1], [2], [3]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +# list_prepend scalar function #5 (function alias `array_prepend`) +query ??? +select list_prepend(1, make_array(2, 3, 4)), list_prepend(1.0, make_array(2.0, 3.0, 4.0)), list_prepend('h', make_array('e', 'l', 'l', 'o')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select list_prepend(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), list_prepend(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), list_prepend('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_push_front scalar function #6 (function alias `array_prepend`) +query ??? +select array_push_front(1, make_array(2, 3, 4)), array_push_front(1.0, make_array(2.0, 3.0, 4.0)), array_push_front('h', make_array('e', 'l', 'l', 'o')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select array_push_front(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), array_push_front(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), array_push_front('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# list_push_front scalar function #7 (function alias `array_prepend`) +query ??? +select list_push_front(1, make_array(2, 3, 4)), list_push_front(1.0, make_array(2.0, 3.0, 4.0)), list_push_front('h', make_array('e', 'l', 'l', 'o')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select list_push_front(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), list_push_front(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), list_push_front('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_prepend scalar function #7 (element is fixed size list) +query ??? +select array_prepend(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'), make_array(arrow_cast(make_array(2), 'FixedSizeList(1, Int64)'), arrow_cast(make_array(3), 'FixedSizeList(1, Int64)'), arrow_cast(make_array(4), 'FixedSizeList(1, Int64)'))), + array_prepend(arrow_cast(make_array(1.0), 'FixedSizeList(1, Float64)'), make_array(arrow_cast([2.0], 'FixedSizeList(1, Float64)'), arrow_cast([3.0], 'FixedSizeList(1, Float64)'), arrow_cast([4.0], 'FixedSizeList(1, Float64)'))), + array_prepend(arrow_cast(make_array('h'), 'FixedSizeList(1, Utf8)'), make_array(arrow_cast(['e'], 'FixedSizeList(1, Utf8)'), arrow_cast(['l'], 'FixedSizeList(1, Utf8)'), arrow_cast(['l'], 'FixedSizeList(1, Utf8)'), arrow_cast(['o'], 'FixedSizeList(1, Utf8)'))); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +query ??? +select array_prepend(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'), arrow_cast(make_array(make_array(2), make_array(3), make_array(4)), 'LargeList(FixedSizeList(1, Int64))')), + array_prepend(arrow_cast(make_array(1.0), 'FixedSizeList(1, Float64)'), arrow_cast(make_array([2.0], [3.0], [4.0]), 'LargeList(FixedSizeList(1, Float64))')), + array_prepend(arrow_cast(make_array('h'), 'FixedSizeList(1, Utf8)'), arrow_cast(make_array(['e'], ['l'], ['l'], ['o']), 'LargeList(FixedSizeList(1, Utf8))')); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +query ??? +select array_prepend(arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([[1], [2], [3]], 'FixedSizeList(3, FixedSizeList(1, Int64))')), + array_prepend(arrow_cast([1.0], 'FixedSizeList(1, Float64)'), arrow_cast([[2.0], [3.0], [4.0]], 'FixedSizeList(3, FixedSizeList(1, Float64))')), + array_prepend(arrow_cast(['h'], 'FixedSizeList(1, Utf8)'), arrow_cast([['e'], ['l'], ['l'], ['o']], 'FixedSizeList(4, FixedSizeList(1, Utf8))')); +---- +[[1], [1], [2], [3]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +# array_prepend with columns #1 +query ? +select array_prepend(column2, column1) from arrays_values; +---- +[1, NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[12, 11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] +[23, 21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] +[34, 31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] +[44] +[NULL, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[55, 51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] +[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] + +query ? +select array_prepend(column2, column1) from large_arrays_values; +---- +[1, NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[12, 11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] +[23, 21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] +[34, 31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] +[44] +[NULL, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[55, 51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] +[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] + +query ? +select array_prepend(column2, column1) from fixed_arrays_values; +---- +[1, NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[12, 11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] +[23, 21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] +[34, 31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] +[44, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL] +[NULL, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[55, 51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] +[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] + +# array_prepend with columns #2 (element is list) +query ? +select array_prepend(column2, column1) from nested_arrays; +---- +[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] +[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] + +query ? +select array_prepend(column2, column1) from large_nested_arrays; +---- +[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] +[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] + +query ? +select array_prepend(column2, column1) from fixed_size_nested_arrays; +---- +[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] +[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] + +# array_prepend with columns and scalars #1 +query ?? +select array_prepend(100.1, column2), array_prepend('.', column3) from arrays; +---- +[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] +[100.1, NULL, 5.5, 6.6] [., i, p, NULL, u, m] +[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r] +[100.1, 10.1, NULL, 12.2] [., s, i, t] +[100.1, 13.3, 14.4, 15.5] [., a, m, e, t] +[100.1] [., ,] +[100.1, 16.6, 17.7, 18.8] [.] + +query ?? +select array_prepend(100.1, column2), array_prepend('.', column3) from large_arrays; +---- +[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] +[100.1, NULL, 5.5, 6.6] [., i, p, NULL, u, m] +[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r] +[100.1, 10.1, NULL, 12.2] [., s, i, t] +[100.1, 13.3, 14.4, 15.5] [., a, m, e, t] +[100.1] [., ,] +[100.1, 16.6, 17.7, 18.8] [.] + +query ?? +select array_prepend(100.1, column2), array_prepend('.', column3) from fixed_size_arrays; +---- +[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] +[100.1, NULL, 5.5, 6.6] [., i, p, NULL, u, m] +[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r] +[100.1, 10.1, NULL, 12.2] [., s, i, t, a, b] +[100.1, 13.3, 14.4, 15.5] [., a, m, e, t, x] +[100.1, NULL, NULL, NULL] [., ,, a, b, c, d] +[100.1, 16.6, 17.7, 18.8] [., NULL, NULL, NULL, NULL, NULL] + +# array_prepend with columns and scalars #2 (element is list) +query ?? +select array_prepend(make_array(1, 11, 111), column1), array_prepend(column2, make_array(make_array(1, 2, 3), make_array(11, 12, 13))) from nested_arrays; +---- +[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] +[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] + +query ?? +select array_prepend(arrow_cast(make_array(1, 11, 111), 'LargeList(Int64)'), column1), array_prepend(column2, arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'LargeList(LargeList(Int64))')) from large_nested_arrays; +---- +[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] +[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] + +query ?? +select array_prepend(arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3, Int64)'), column1), array_prepend(column2, arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))')) from fixed_size_nested_arrays; +---- +[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] +[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] + +## array_repeat (aliases: `list_repeat`) + +# array_repeat scalar function #1 +query ???????? +select + array_repeat(1, 5), + array_repeat(3.14, 3), + array_repeat('l', 4), + array_repeat(null, 2), + list_repeat(-1, 5), + list_repeat(-3.14, 0), + list_repeat('rust', 4), + list_repeat(null, 0); +---- +[1, 1, 1, 1, 1] [3.14, 3.14, 3.14] [l, l, l, l] [NULL, NULL] [-1, -1, -1, -1, -1] [] [rust, rust, rust, rust] [] + +# array_repeat scalar function #2 (element as list) +query ???? +select + array_repeat([1], 5), + array_repeat([1.1, 2.2, 3.3], 3), + array_repeat([null, null], 3), + array_repeat([[1, 2], [3, 4]], 2); +---- +[[1], [1], [1], [1], [1]] [[1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3]] [[NULL, NULL], [NULL, NULL], [NULL, NULL]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] + +query ???? +select + array_repeat(arrow_cast([1], 'LargeList(Int64)'), 5), + array_repeat(arrow_cast([1.1, 2.2, 3.3], 'LargeList(Float64)'), 3), + array_repeat(arrow_cast([null, null], 'LargeList(Int64)'), 3), + array_repeat(arrow_cast([[1, 2], [3, 4]], 'LargeList(List(Int64))'), 2); +---- +[[1], [1], [1], [1], [1]] [[1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3]] [[NULL, NULL], [NULL, NULL], [NULL, NULL]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] + +# array_repeat scalar function with count of different integer types +query ???????? +Select + array_repeat(1, arrow_cast(2,'Int8')), + array_repeat(2, arrow_cast(2,'Int16')), + array_repeat(3, arrow_cast(2,'Int32')), + array_repeat(4, arrow_cast(2,'Int64')), + array_repeat(1, arrow_cast(2,'UInt8')), + array_repeat(2, arrow_cast(2,'UInt16')), + array_repeat(3, arrow_cast(2,'UInt32')), + array_repeat(4, arrow_cast(2,'UInt64')); +---- +[1, 1] [2, 2] [3, 3] [4, 4] [1, 1] [2, 2] [3, 3] [4, 4] + +# array_repeat scalar function with count of negative integer types +query ???? +Select + array_repeat(1, arrow_cast(-2,'Int8')), + array_repeat(2, arrow_cast(-2,'Int16')), + array_repeat(3, arrow_cast(-2,'Int32')), + array_repeat(4, arrow_cast(-2,'Int64')); +---- +[] [] [] [] + +# array_repeat with columns #1 + +statement ok +CREATE TABLE array_repeat_table +AS VALUES + (1, 1, 1.1, 'a', make_array(4, 5, 6)), + (2, null, null, null, null), + (3, 2, 2.2, 'rust', make_array(7)), + (0, 3, 3.3, 'datafusion', make_array(8, 9)); + +statement ok +CREATE TABLE large_array_repeat_table +AS SELECT + column1, + column2, + column3, + column4, + arrow_cast(column5, 'LargeList(Int64)') as column5 +FROM array_repeat_table; + +query ?????? +select + array_repeat(column2, column1), + array_repeat(column3, column1), + array_repeat(column4, column1), + array_repeat(column5, column1), + array_repeat(column2, 3), + array_repeat(make_array(1), column1) +from array_repeat_table; +---- +[1] [1.1] [a] [[4, 5, 6]] [1, 1, 1] [[1]] +[NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL, NULL] [[1], [1]] +[2, 2, 2] [2.2, 2.2, 2.2] [rust, rust, rust] [[7], [7], [7]] [2, 2, 2] [[1], [1], [1]] +[] [] [] [] [3, 3, 3] [] + +query ?????? +select + array_repeat(column2, column1), + array_repeat(column3, column1), + array_repeat(column4, column1), + array_repeat(column5, column1), + array_repeat(column2, 3), + array_repeat(make_array(1), column1) +from large_array_repeat_table; +---- +[1] [1.1] [a] [[4, 5, 6]] [1, 1, 1] [[1]] +[NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL, NULL] [[1], [1]] +[2, 2, 2] [2.2, 2.2, 2.2] [rust, rust, rust] [[7], [7], [7]] [2, 2, 2] [[1], [1], [1]] +[] [] [] [] [3, 3, 3] [] + +statement ok +drop table array_repeat_table; + +statement ok +drop table large_array_repeat_table; + +# array_repeat: arrays with NULL counts +statement ok +create table array_repeat_null_count_table +as values +(1, 2), +(2, null), +(3, 1), +(4, -1), +(null, null); + +query I? +select column1, array_repeat(column1, column2) from array_repeat_null_count_table; +---- +1 [1, 1] +2 NULL +3 [3] +4 [] +NULL NULL + +statement ok +drop table array_repeat_null_count_table + +# array_repeat: nested arrays with NULL counts +statement ok +create table array_repeat_nested_null_count_table +as values +([[1, 2], [3, 4]], 2), +([[5, 6], [7, 8]], null), +([[null, null], [9, 10]], 1), +(null, 3), +([[11, 12]], -1); + +query ?? +select column1, array_repeat(column1, column2) from array_repeat_nested_null_count_table; +---- +[[1, 2], [3, 4]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] +[[5, 6], [7, 8]] NULL +[[NULL, NULL], [9, 10]] [[[NULL, NULL], [9, 10]]] +NULL [NULL, NULL, NULL] +[[11, 12]] [] + +statement ok +drop table array_repeat_nested_null_count_table + +# array_repeat edge cases: empty arrays +query ??? +select array_repeat([], 3), array_repeat([], 0), array_repeat([], null); +---- +[[], [], []] [] NULL + +query ?? +select array_repeat(null::int, 0), array_repeat(null::int, null); +---- +[] NULL + +# array_repeat LargeList with NULL count +statement ok +create table array_repeat_large_list_null_table +as values +(arrow_cast([1, 2, 3], 'LargeList(Int64)'), 2), +(arrow_cast([4, 5], 'LargeList(Int64)'), null), +(arrow_cast(null, 'LargeList(Int64)'), 3); + +query ?? +select column1, array_repeat(column1, column2) from array_repeat_large_list_null_table; +---- +[1, 2, 3] [[1, 2, 3], [1, 2, 3]] +[4, 5] NULL +NULL [NULL, NULL, NULL] + +statement ok +drop table array_repeat_large_list_null_table + +# array_repeat edge cases: LargeList nested with NULL count +statement ok +create table array_repeat_large_nested_null_table +as values +(arrow_cast([[1, 2], [3, 4]], 'LargeList(List(Int64))'), 2), +(arrow_cast([[5, 6], [7, 8]], 'LargeList(List(Int64))'), null), +(arrow_cast([[null, null]], 'LargeList(List(Int64))'), 1), +(null, 3); + +query ?? +select column1, array_repeat(column1, column2) from array_repeat_large_nested_null_table; +---- +[[1, 2], [3, 4]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] +[[5, 6], [7, 8]] NULL +[[NULL, NULL]] [[[NULL, NULL]]] +NULL [NULL, NULL, NULL] + +statement ok +drop table array_repeat_large_nested_null_table + +## array_concat (aliases: `array_cat`, `list_concat`, `list_cat`) + +# test with empty array +query ? +select array_concat([]); +---- +[] + +# test with NULL array +query ? +select array_concat(NULL::integer[]); +---- +NULL + +# test with multiple NULL arrays +query ? +select array_concat(NULL::integer[], NULL::integer[]); +---- +NULL + +# test with NULL LargeList +query ? +select array_concat(arrow_cast(NULL::string[], 'LargeList(Utf8)')); +---- +NULL + +# test with NULL FixedSizeList +query ? +select array_concat(arrow_cast(NULL::string[], 'FixedSizeList(2, Utf8)')); +---- +NULL + +# test with mix of NULL and empty arrays +query ? +select array_concat(NULL::integer[], []); +---- +[] + +# test with mix of NULL and non-empty arrays +query ? +select array_concat(NULL::integer[], [1, 2, 3]); +---- +[1, 2, 3] + +# Concatenating strings arrays +query ? +select array_concat( + ['1', '2'], + ['3'] +); +---- +[1, 2, 3] + +query ? +select array_concat( + arrow_cast(['1', '2'], 'LargeList(Utf8)'), + arrow_cast(['3'], 'LargeList(Utf8)') +); +---- +[1, 2, 3] + +query ? +select array_concat( + arrow_cast(['1', '2'], 'FixedSizeList(2, Utf8)'), + arrow_cast(['3'], 'FixedSizeList(1, Utf8)') +); +---- +[1, 2, 3] + +# Concatenating string arrays +query ? +select array_concat( + [arrow_cast('1', 'LargeUtf8'), arrow_cast('2', 'LargeUtf8')], + [arrow_cast('3', 'LargeUtf8')] +); +---- +[1, 2, 3] + +# Concatenating stringview +query ? +select array_concat( + [arrow_cast('1', 'Utf8View'), arrow_cast('2', 'Utf8View')], + [arrow_cast('3', 'Utf8View')] +); +---- +[1, 2, 3] + +# Concatenating Mixed types +query ? +select array_concat( + [arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], + [arrow_cast('3', 'LargeUtf8')] +); +---- +[1, 2, 3] + +# Concatenating Mixed types +query ?T +select + array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')]), + arrow_typeof(array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')])); +---- +[1, 2, 3] List(Utf8View) + +# array_concat with NULL elements inside arrays +query ? +select array_concat([1, NULL, 3], [NULL, 5]); +---- +[1, NULL, 3, NULL, 5] + +query ? +select array_concat([NULL, NULL], [1, 2], [NULL]); +---- +[NULL, NULL, 1, 2, NULL] + +query ? +select array_concat([NULL, NULL], [NULL, NULL]); +---- +[NULL, NULL, NULL, NULL] + +# array_concat error +query error DataFusion error: Error during planning: Execution error: Function 'array_concat' user-defined coercion failed with: Error during planning: array_concat does not support type Int64 +select array_concat(1, 2); + +# array_concat scalar function #1 +query ?? +select array_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_concat(make_array([1], [2]), make_array([3], [4])); +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] + +# array_concat scalar function #2 +query ? +select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array(5, 6), make_array(7, 8))); +---- +[[1, 2], [3, 4], [5, 6], [7, 8]] + +# array_concat scalar function #3 +query ? +select array_concat(make_array([1], [2], [3]), make_array([4], [5], [6]), make_array([7], [8], [9])); +---- +[[1], [2], [3], [4], [5], [6], [7], [8], [9]] + +# array_concat scalar function #4 +query ? +select array_concat(make_array([[1]]), make_array([[2]])); +---- +[[[1]], [[2]]] + +# array_concat scalar function #5 +query ? +select array_concat(make_array(2, 3), make_array()); +---- +[2, 3] + +# array_concat scalar function #6 +query ? +select array_concat(make_array(), make_array(2, 3)); +---- +[2, 3] + +# array_concat scalar function #7 (with empty arrays) +query ? +select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array())); +---- +[[1, 2], [3, 4], []] + +# array_concat scalar function #8 (with empty arrays) +query ? +select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array()), make_array(make_array(), make_array()), make_array(make_array(5, 6), make_array(7, 8))); +---- +[[1, 2], [3, 4], [], [], [], [5, 6], [7, 8]] + +# array_concat scalar function #9 (with empty arrays) +query ? +select array_concat(make_array(make_array()), make_array(make_array(1, 2), make_array(3, 4))); +---- +[[], [1, 2], [3, 4]] + +# array_cat scalar function #10 (function alias `array_concat`) +query ?? +select array_cat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_cat(make_array([1], [2]), make_array([3], [4])); +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] + +# list_concat scalar function #11 (function alias `array_concat`) +query ?? +select list_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), list_concat(make_array([1], [2]), make_array([3], [4])); +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] + +# list_cat scalar function #12 (function alias `array_concat`) +query ?? +select list_cat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), list_cat(make_array([1], [2]), make_array([3], [4])); +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] + +# array_concat with different dimensions #1 (2D + 1D) +query ? +select array_concat(make_array([1,2], [3,4]), make_array(5, 6)); +---- +[[1, 2], [3, 4], [5, 6]] + +# array_concat with different dimensions #2 (1D + 2D) +query ? +select array_concat(make_array(5, 6), make_array([1,2], [3,4])); +---- +[[5, 6], [1, 2], [3, 4]] + +# array_concat with different dimensions #3 (2D + 1D + 1D) +query ? +select array_concat(make_array([1,2], [3,4]), make_array(5, 6), make_array(7,8)); +---- +[[1, 2], [3, 4], [5, 6], [7, 8]] + +# array_concat with different dimensions #4 (1D + 2D + 3D) +query ? +select array_concat(make_array(10, 20), make_array([30, 40]), make_array([[50, 60]])); +---- +[[[10, 20]], [[30, 40]], [[50, 60]]] + +# array_concat with different dimensions #5 (2D + 1D + 3D) +query ? +select array_concat(make_array([30, 40]), make_array(10, 20), make_array([[50, 60]])); +---- +[[[30, 40]], [[10, 20]], [[50, 60]]] + +# array_concat with different dimensions #6 (2D + 1D + 3D + 4D + 3D) +query ? +select array_concat(make_array([30, 40]), make_array(10, 20), make_array([[50, 60]]), make_array([[[70, 80]]]), make_array([[80, 40]])); +---- +[[[[30, 40]]], [[[10, 20]]], [[[50, 60]]], [[[70, 80]]], [[[80, 40]]]] + +# array_concat column-wise #1 +query ? +select array_concat(column1, make_array(0)) from arrays_values_without_nulls; +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0] +[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 0] +[21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0] +[31, 32, 33, 34, 35, 26, 37, 38, 39, 40, 0] + +# array_concat column-wise #2 +query ? +select array_concat(column1, column1) from arrays_values_without_nulls; +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] +[21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] +[31, 32, 33, 34, 35, 26, 37, 38, 39, 40, 31, 32, 33, 34, 35, 26, 37, 38, 39, 40] + +# array_concat column-wise #3 +query ? +select array_concat(make_array(column2), make_array(column3)) from arrays_values_without_nulls; +---- +[1, 1] +[12, 2] +[23, 3] +[34, 4] + +# array_concat column-wise #4 +query ? +select array_concat(make_array(column2), make_array(0)) from arrays_values; +---- +[1, 0] +[12, 0] +[23, 0] +[34, 0] +[44, 0] +[NULL, 0] +[55, 0] +[66, 0] + +# array_concat column-wise #5 +query ??? +select array_concat(column1, column1), array_concat(column2, column2), array_concat(column3, column3) from arrays; +---- +[[NULL, 2], [3, NULL], [NULL, 2], [3, NULL]] [1.1, 2.2, 3.3, 1.1, 2.2, 3.3] [L, o, r, e, m, L, o, r, e, m] +[[3, 4], [5, 6], [3, 4], [5, 6]] [NULL, 5.5, 6.6, NULL, 5.5, 6.6] [i, p, NULL, u, m, i, p, NULL, u, m] +[[5, 6], [7, 8], [5, 6], [7, 8]] [7.7, 8.8, 9.9, 7.7, 8.8, 9.9] [d, NULL, l, o, r, d, NULL, l, o, r] +[[7, NULL], [9, 10], [7, NULL], [9, 10]] [10.1, NULL, 12.2, 10.1, NULL, 12.2] [s, i, t, s, i, t] +NULL [13.3, 14.4, 15.5, 13.3, 14.4, 15.5] [a, m, e, t, a, m, e, t] +[[11, 12], [13, 14], [11, 12], [13, 14]] NULL [,, ,] +[[15, 16], [NULL, 18], [15, 16], [NULL, 18]] [16.6, 17.7, 18.8, 16.6, 17.7, 18.8] NULL + +# array_concat column-wise #6 +query ?? +select array_concat(column1, make_array(make_array(1, 2), make_array(3, 4))), array_concat(column2, make_array(1.1, 2.2, 3.3)) from arrays; +---- +[[NULL, 2], [3, NULL], [1, 2], [3, 4]] [1.1, 2.2, 3.3, 1.1, 2.2, 3.3] +[[3, 4], [5, 6], [1, 2], [3, 4]] [NULL, 5.5, 6.6, 1.1, 2.2, 3.3] +[[5, 6], [7, 8], [1, 2], [3, 4]] [7.7, 8.8, 9.9, 1.1, 2.2, 3.3] +[[7, NULL], [9, 10], [1, 2], [3, 4]] [10.1, NULL, 12.2, 1.1, 2.2, 3.3] +[[1, 2], [3, 4]] [13.3, 14.4, 15.5, 1.1, 2.2, 3.3] +[[11, 12], [13, 14], [1, 2], [3, 4]] [1.1, 2.2, 3.3] +[[15, 16], [NULL, 18], [1, 2], [3, 4]] [16.6, 17.7, 18.8, 1.1, 2.2, 3.3] + +# array_concat column-wise #7 +query ? +select array_concat(column3, make_array('.', '.', '.')) from arrays; +---- +[L, o, r, e, m, ., ., .] +[i, p, NULL, u, m, ., ., .] +[d, NULL, l, o, r, ., ., .] +[s, i, t, ., ., .] +[a, m, e, t, ., ., .] +[,, ., ., .] +[., ., .] + +# query ??I? +# select column1, column2, column3, column4 from arrays_values_v2; +# ---- +# [NULL, 2, 3] [4, 5, NULL] 12 [[30, 40, 50]] +# NULL [7, NULL, 8] 13 [[NULL, NULL, 60]] +# [9, NULL, 10] NULL 14 [[70, NULL, NULL]] +# [NULL, 1] [NULL, 21] NULL NULL +# [11, 12] NULL NULL NULL +# NULL NULL NULL NULL + + +# array_concat column-wise #8 (1D + 1D) +query ? +select array_concat(column1, column2) from arrays_values_v2; +---- +[NULL, 2, 3, 4, 5, NULL] +[7, NULL, 8] +[9, NULL, 10] +[NULL, 1, NULL, 21] +[11, 12] +NULL + +# array_concat column-wise #9 (2D + 1D) +query ? +select array_concat(column4, make_array(column3)) from arrays_values_v2; +---- +[[30, 40, 50], [12]] +[[NULL, NULL, 60], [13]] +[[70, NULL, NULL], [14]] +[[NULL]] +[[NULL]] +[[NULL]] + +# array_concat column-wise #10 (3D + 2D + 1D) +query ? +select array_concat(column4, column1, column2) from nested_arrays; +---- +[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]], [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]], [[7, 8, 9]]] +[[[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]], [[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]], [[10, 11, 12]]] + +# array_concat column-wise #11 (2D + 1D) +query ? +select array_concat(column4, column1) from arrays_values_v2; +---- +[[30, 40, 50], [NULL, 2, 3]] +[[NULL, NULL, 60], NULL] +[[70, NULL, NULL], [9, NULL, 10]] +[[NULL, 1]] +[[11, 12]] +[NULL] + +# array_concat column-wise #12 (1D + 1D + 1D) +query ? +select array_concat(make_array(column3), column1, column2) from arrays_values_v2; +---- +[12, NULL, 2, 3, 4, 5, NULL] +[13, 7, NULL, 8] +[14, 9, NULL, 10] +[NULL, NULL, 1, NULL, 21] +[NULL, 11, 12] +[NULL] + +## array_position (aliases: `list_position`, `array_indexof`, `list_indexof`) + +## array_position with NULL (follow PostgreSQL) +query II +select array_position([1, 2, 3, 4, 5], arrow_cast(NULL, 'Int64')), array_position(arrow_cast(NULL, 'List(Int64)'), 1); +---- +NULL NULL + +# array_position with no match (incl. empty array) returns NULL +query II +select array_position([], 1), array_position([2], 1); +---- +NULL NULL + +# array_position scalar function #1 +query III +select array_position(['h', 'e', 'l', 'l', 'o'], 'l'), array_position([1, 2, 3, 4, 5], 5), array_position([1, 1, 1], 1); +---- +3 5 1 + +query III +select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), array_position(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), array_position(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); +---- +3 5 1 + +query III +select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l'), array_position(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5), array_position(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1); +---- +3 5 1 + +# array_position scalar function #2 (with optional argument) +query III +select array_position(['h', 'e', 'l', 'l', 'o'], 'l', 4), array_position([1, 2, 5, 4, 5], 5, 4), array_position([1, 1, 1], 1, 2); +---- +4 5 2 + +query III +select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l', 4), array_position(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5, 4), array_position(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1, 2); +---- +4 5 2 + +query III +select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l', 4), array_position(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5, 4), array_position(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1, 2); +---- +4 5 2 + +# array_position scalar function #3 (element is list) +query II +select array_position(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_position(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]); +---- +2 2 + +# array_position scalar function #4 (element in list; with optional argument) +query II +select array_position(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6], 3), array_position(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4], 3); +---- +4 3 + +query II +select array_position(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6]), array_position(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4]); +---- +2 2 + +query I +SELECT array_position(arrow_cast([5, 2, 3, 4, 5], 'List(Int32)'), 5) +---- +1 + +query I +SELECT array_position(arrow_cast([5, 2, 3, 4, 5], 'List(Int32)'), 5, 2) +---- +5 + +query I +SELECT array_position(arrow_cast([1, 1, 100, 1, 1], 'LargeList(Int32)'), 100) +---- +3 + +query error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'array_position' function: coercion from +SELECT array_position([1, 2, 3], 'foo') + +query error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'array_position' function: coercion from +SELECT array_position([1, 2, 3], 'foo', 2) + +# list_position scalar function #5 (function alias `array_position`) +query III +select list_position(['h', 'e', 'l', 'l', 'o'], 'l'), list_position([1, 2, 3, 4, 5], 5), list_position([1, 1, 1], 1); +---- +3 5 1 + +query III +select list_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), list_position(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), list_position(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); +---- +3 5 1 + +# array_indexof scalar function #6 (function alias `array_position`) +query III +select array_indexof(['h', 'e', 'l', 'l', 'o'], 'l'), array_indexof([1, 2, 3, 4, 5], 5), array_indexof([1, 1, 1], 1); +---- +3 5 1 + +query III +select array_indexof(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), array_indexof(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), array_indexof(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); +---- +3 5 1 + +# list_indexof scalar function #7 (function alias `array_position`) +query III +select list_indexof(['h', 'e', 'l', 'l', 'o'], 'l'), list_indexof([1, 2, 3, 4, 5], 5), list_indexof([1, 1, 1], 1); +---- +3 5 1 + +query III +select list_indexof(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), list_indexof(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), list_indexof(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); +---- +3 5 1 + +# array_position with columns #1 +query II +select array_position(column1, column2), array_position(column1, column2, column3) from arrays_values_without_nulls; +---- +1 1 +2 2 +3 3 +4 4 + +query II +select array_position(column1, column2), array_position(column1, column2, column3) from large_arrays_values_without_nulls; +---- +1 1 +2 2 +3 3 +4 4 + +# array_position with columns #2 (element is list) +query II +select array_position(column1, column2), array_position(column1, column2, column3) from nested_arrays; +---- +3 3 +2 5 + +query II +select array_position(column1, column2), array_position(column1, column2, column3) from nested_arrays; +---- +3 3 +2 5 + +# array_position with columns and scalars #1 +query III +select array_position(make_array(1, 2, 3, 4, 5), column2), array_position(column1, 3), array_position(column1, 3, 5) from arrays_values_without_nulls; +---- +1 3 NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL + +query III +select array_position(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), column2), array_position(column1, 3), array_position(column1, 3, 5) from large_arrays_values_without_nulls; +---- +1 3 NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL + +# array_position with columns and scalars #2 (element is list) +query III +select array_position(make_array([1, 2, 3], [4, 5, 6], [11, 12, 13]), column2), array_position(column1, make_array(4, 5, 6)), array_position(column1, make_array(1, 2, 3), 2) from nested_arrays; +---- +NULL 6 4 +NULL 1 NULL + +query III +select array_position(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [11, 12, 13]), 'LargeList(LargeList(Int64))'), column2), array_position(column1, arrow_cast(make_array(4, 5, 6), 'LargeList(Int64)')), array_position(column1, arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 2) from large_nested_arrays; +---- +NULL 6 4 +NULL 1 NULL + +# array_position with NULL element in haystack array (NULL = NULL semantics) +query III +select array_position([1, NULL, 3], arrow_cast(NULL, 'Int64')), array_position([NULL, 2, 3], arrow_cast(NULL, 'Int64')), array_position([1, 2, NULL], arrow_cast(NULL, 'Int64')); +---- +2 1 3 + +query I +select array_position(arrow_cast([1, NULL, 3], 'LargeList(Int64)'), arrow_cast(NULL, 'Int64')); +---- +2 + +# array_position with NULL element in array and start_from +query II +select array_position([NULL, 1, NULL, 2], arrow_cast(NULL, 'Int64'), 2), array_position([NULL, 1, NULL, 2], arrow_cast(NULL, 'Int64'), 1); +---- +3 1 + +# array_position with column array and scalar element +query IIII +select array_position(column1, 3), array_position(column1, 10), array_position(column1, 20), array_position(column1, 999) from arrays_values_without_nulls; +---- +3 10 NULL NULL +NULL NULL 10 NULL +NULL NULL NULL NULL +NULL NULL NULL NULL + +query II +select array_position(column1, 3), array_position(column1, 20) from large_arrays_values_without_nulls; +---- +3 NULL +NULL 10 +NULL NULL +NULL NULL + +query II +select array_position(column1, 3), array_position(column1, 20) from fixed_size_arrays_values_without_nulls; +---- +3 NULL +NULL 10 +NULL NULL +NULL NULL + +# array_position with column array, scalar element, and scalar start_from +query II +select array_position(column1, 3, 1), array_position(column1, 3, 4) from arrays_values_without_nulls; +---- +3 NULL +NULL NULL +NULL NULL +NULL NULL + +query II +select array_position(column1, 3, 1), array_position(column1, 3, 4) from large_arrays_values_without_nulls; +---- +3 NULL +NULL NULL +NULL NULL +NULL NULL + +# array_position with column array, scalar element, and column start_from +query I +select array_position(column1, 3, column3) from arrays_values_without_nulls; +---- +3 +NULL +NULL +NULL + +# array_position with scalar haystack, scalar element, and column start_from +query I +select array_position([1, 2, 1, 2], 2, column3) from arrays_values_without_nulls; +---- +2 +2 +4 +4 + +# array_position start_from boundary cases +query IIII +select array_position([1, 2, 3], 3, 3), array_position([1, 2, 3], 1, 2), array_position([1, 2, 3], 1, 1), array_position([1, 2, 3], 3, 4); +---- +3 NULL 1 NULL + +query II +select array_position([1, 2, 3], 3, 4), array_position([1], 1, 2); +---- +NULL NULL + +# array_position with empty array in various contexts +query II +select array_position(arrow_cast(make_array(), 'List(Int64)'), 1), array_position(arrow_cast(make_array(), 'LargeList(Int64)'), 1); +---- +NULL NULL + +# FixedSizeList with start_from +query II +select array_position(arrow_cast([1, 2, 3, 1, 2], 'FixedSizeList(5, Int64)'), 1, 2), array_position(arrow_cast([1, 2, 3, 1, 2], 'FixedSizeList(5, Int64)'), 2, 4); +---- +4 5 + +query I +select array_position(arrow_cast(['a', 'b', 'c', 'b'], 'FixedSizeList(4, Utf8)'), 'b', 3); +---- +4 + +## array_positions (aliases: `list_positions`) + +# array_positions with empty array +query ? +select array_positions(arrow_cast(make_array(), 'List(Int64)'), 1); +---- +[] + +query ? +select array_positions([1, 2, 3, 4, 5], null); +---- +[] + +#TODO: https://github.com/apache/datafusion/issues/7142 +# array_positions with NULL (follow PostgreSQL) +#query ? +#select array_positions(null, 1); +#---- +#NULL + +# array_positions scalar function #1 +query ??? +select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3, 4, 5], 5), array_positions([1, 1, 1], 1); +---- +[3, 4] [5] [1, 2, 3] + +query ??? +select array_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), array_positions(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), array_positions(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); +---- +[3, 4] [5] [1, 2, 3] + +query ??? +select array_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l'), array_positions(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5), array_positions(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1); +---- +[3, 4] [5] [1, 2, 3] + +# array_positions scalar function #2 (element is list) +query ? +select array_positions(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2, 1, 3], [4, 5, 6]), [2, 1, 3]); +---- +[2, 4] + +query ? +select array_positions(arrow_cast(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2, 1, 3], [4, 5, 6]), 'LargeList(List(Int64))'), [2, 1, 3]); +---- +[2, 4] + +query ? +select array_positions(arrow_cast(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2, 1, 3], [4, 5, 6]), 'FixedSizeList(5, List(Int64))'), [2, 1, 3]); +---- +[2, 4] + +# list_positions scalar function #3 (function alias `array_positions`) +query ??? +select list_positions(['h', 'e', 'l', 'l', 'o'], 'l'), list_positions([1, 2, 3, 4, 5], 5), list_positions([1, 1, 1], 1); +---- +[3, 4] [5] [1, 2, 3] + +query ??? +select list_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), list_positions(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), list_positions(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); +---- +[3, 4] [5] [1, 2, 3] + +query ??? +select list_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l'), + list_positions(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5), + list_positions(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1); +---- +[3, 4] [5] [1, 2, 3] + +# array_positions with columns #1 +query ? +select array_positions(column1, column2) from arrays_values_without_nulls; +---- +[1] +[2] +[3] +[4] + +query ? +select array_positions(arrow_cast(column1, 'LargeList(Int64)'), column2) from arrays_values_without_nulls; +---- +[1] +[2] +[3] +[4] + +query ? +select array_positions(arrow_cast(column1, 'LargeList(Int64)'), column2) from fixed_size_arrays_values_without_nulls; +---- +[1] +[2] +[3] +[4] + +# array_positions with columns #2 (element is list) +query ? +select array_positions(column1, column2) from nested_arrays; +---- +[3] +[2, 5] + +query ? +select array_positions(arrow_cast(column1, 'LargeList(List(Int64))'), column2) from nested_arrays; +---- +[3] +[2, 5] + +query ? +select array_positions(column1, column2) from fixed_size_nested_arrays; +---- +[3] +[2, 5] + +# array_positions with columns and scalars #1 +query ?? +select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values_without_nulls; +---- +[4] [1] +[] [] +[] [3] +[] [] + +query ?? +select array_positions(arrow_cast(column1, 'LargeList(Int64)'), 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values_without_nulls; +---- +[4] [1] +[] [] +[] [3] +[] [] + +query ?? +select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from fixed_size_arrays_values_without_nulls; +---- +[4] [1] +[] [] +[] [3] +[] [] + +# array_positions with columns and scalars #2 (element is list) +query ?? +select array_positions(column1, make_array(4, 5, 6)), array_positions(make_array([1, 2, 3], [11, 12, 13], [4, 5, 6]), column2) from nested_arrays; +---- +[6] [] +[1] [] + +query ?? +select array_positions(arrow_cast(column1, 'LargeList(List(Int64))'), make_array(4, 5, 6)), array_positions(arrow_cast(make_array([1, 2, 3], [11, 12, 13], [4, 5, 6]), 'LargeList(List(Int64))'), column2) from nested_arrays; +---- +[6] [] +[1] [] + +query ?? +select array_positions(column1, make_array(4, 5, 6)), array_positions(make_array([1, 2, 3], [11, 12, 13], [4, 5, 6]), column2) from fixed_size_nested_arrays; +---- +[6] [] +[1] [] + +## array_replace (aliases: `list_replace`) + +# array_replace scalar function #1 +query ??? +select + array_replace(make_array(1, 2, 3, 4), 2, 3), + array_replace(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), + array_replace(make_array(1, 2, 3), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] + +query ??? +select + array_replace(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), + array_replace(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), + array_replace(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] + +query ??? +select + array_replace(arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), 2, 3), + array_replace(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'FixedSizeList(7, Int64)'), 4, 0), + array_replace(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] + +# array_replace scalar function #2 (element is list) +query ?? +select + array_replace( + make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), + [4, 5, 6], + [1, 1, 1] + ), + array_replace( + make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), + [2, 3, 4], + [3, 1, 4] + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select + array_replace( + arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), + [4, 5, 6], + [1, 1, 1] + ), + array_replace( + arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), + [2, 3, 4], + [3, 1, 4] + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select + array_replace( + arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), + [4, 5, 6], + [1, 1, 1] + ), + array_replace( + arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), + [2, 3, 4], + [3, 1, 4] + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +# list_replace scalar function #3 (function alias `list_replace`) +query ??? +select list_replace( + make_array(1, 2, 3, 4), 2, 3), + list_replace(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), + list_replace(make_array(1, 2, 3), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] + +query ??? +select list_replace( + arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), + list_replace(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), + list_replace(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] + +# array_replace scalar function #4 (null input) +query ? +select array_replace(make_array(1, 2, 3, 4, 5), NULL, NULL); +---- +[1, 2, 3, 4, 5] + +query ? +select array_replace(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, NULL); +---- +[1, 2, 3, 4, 5] + +# array_replace scalar function with columns #1 +query ? +select array_replace(column1, column2, column3) from arrays_with_repeating_elements; +---- +[1, 4, 1, 3, 2, 2, 1, 3, 2, 3] +[7, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[10, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[13, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +query ? +select array_replace(column1, column2, column3) from large_arrays_with_repeating_elements; +---- +[1, 4, 1, 3, 2, 2, 1, 3, 2, 3] +[7, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[10, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[13, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +# array_replace scalar function with columns #2 (element is list) +query ? +select array_replace(column1, column2, column3) from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[19, 20, 21], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[28, 29, 30], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ? +select array_replace(column1, column2, column3) from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[19, 20, 21], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[28, 29, 30], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +# array_replace scalar function with columns and scalars #1 +query ??? +select + array_replace(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column3), + array_replace(column1, 1, column3), + array_replace(column1, column2, 4) +from arrays_with_repeating_elements; +---- +[1, 4, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 1, 3, 2, 2, 1, 3, 2, 3] [1, 4, 1, 3, 2, 2, 1, 3, 2, 3] +[1, 2, 2, 7, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +query ??? +select + array_replace(arrow_cast(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), 'LargeList(Int64)'), column2, column3), + array_replace(column1, 1, column3), + array_replace(column1, column2, 4) +from large_arrays_with_repeating_elements; +---- +[1, 4, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 1, 3, 2, 2, 1, 3, 2, 3] [1, 4, 1, 3, 2, 2, 1, 3, 2, 3] +[1, 2, 2, 7, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +# array_replace scalar function with columns and scalars #2 (element is list) +query ??? +select + array_replace( + make_array( + [1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), + column2, + column3 + ), + array_replace(column1, make_array(1, 2, 3), column3), + array_replace(column1, column2, make_array(11, 12, 13)) +from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ??? +select + array_replace( + arrow_cast(make_array( + [1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]),'LargeList(List(Int64))'), + column2, + column3 + ), + array_replace(column1, make_array(1, 2, 3), column3), + array_replace(column1, column2, make_array(11, 12, 13)) +from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +## array_replace_n (aliases: `list_replace_n`) + +# array_replace_n scalar function #1 +query ??? +select + array_replace_n(make_array(1, 2, 3, 4), 2, 3, 2), + array_replace_n(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0, 2), + array_replace_n(make_array(1, 2, 3), 4, 0, 3); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] + +query ??? +select + array_replace_n(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3, 2), + array_replace_n(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0, 2), + array_replace_n(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0, 3); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] + +query ??? +select + array_replace_n(arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), 2, 3, 2), + array_replace_n(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'FixedSizeList(7, Int64)'), 4, 0, 2), + array_replace_n(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4, 0, 3); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] + +# array_replace_n scalar function #2 (element is list) +query ?? +select + array_replace_n( + make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), + [4, 5, 6], + [1, 1, 1], + 2 + ), + array_replace_n( + make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), + [2, 3, 4], + [3, 1, 4], + 2 + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select + array_replace_n( + arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), + [4, 5, 6], + [1, 1, 1], + 2 + ), + array_replace_n( + arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), + [2, 3, 4], + [3, 1, 4], + 2 + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select + array_replace_n( + arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), + [4, 5, 6], + [1, 1, 1], + 2 + ), + array_replace_n( + arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), + [2, 3, 4], + [3, 1, 4], + 2 + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] + +# list_replace_n scalar function #3 (function alias `array_replace_n`) +query ??? +select + list_replace_n(make_array(1, 2, 3, 4), 2, 3, 2), + list_replace_n(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0, 2), + list_replace_n(make_array(1, 2, 3), 4, 0, 3); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] + +query ??? +select + list_replace_n(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3, 2), + list_replace_n(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0, 2), + list_replace_n(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0, 3); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] + +# array_replace_n scalar function #4 (null input) +query ? +select array_replace_n(make_array(1, 2, 3, 4, 5), NULL, NULL, NULL); +---- +[1, 2, 3, 4, 5] + +query ? +select array_replace_n(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, NULL, NULL); +---- +[1, 2, 3, 4, 5] + +# array_replace_n scalar function with columns #1 +query ? +select + array_replace_n(column1, column2, column3, column4) +from arrays_with_repeating_elements; +---- +[1, 4, 1, 3, 4, 4, 1, 3, 2, 3] +[7, 7, 5, 5, 6, 5, 5, 5, 4, 4] +[10, 10, 10, 8, 10, 9, 10, 8, 7, 7] +[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] + +query ? +select + array_replace_n(column1, column2, column3, column4) +from large_arrays_with_repeating_elements; +---- +[1, 4, 1, 3, 4, 4, 1, 3, 2, 3] +[7, 7, 5, 5, 6, 5, 5, 5, 4, 4] +[10, 10, 10, 8, 10, 9, 10, 8, 7, 7] +[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] + +# array_replace_n scalar function with columns #2 (element is list) +query ? +select + array_replace_n(column1, column2, column3, column4) +from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] + +query ? +select + array_replace_n(column1, column2, column3, column4) +from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] + + +# array_replace_n scalar function with columns and scalars #1 +query ???? +select + array_replace_n(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column3, column4), + array_replace_n(column1, 1, column3, column4), + array_replace_n(column1, column2, 4, column4), + array_replace_n(column1, column2, column3, 2) +from arrays_with_repeating_elements; +---- +[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 2, 3] [1, 4, 1, 3, 4, 2, 1, 3, 2, 3] +[1, 2, 2, 7, 5, 7, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [7, 7, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 7, 7] [10, 10, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] [13, 11, 12, 13, 11, 12, 10, 11, 12, 10] + +query ???? +select + array_replace_n(arrow_cast(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), 'LargeList(Int64)'), column2, column3, column4), + array_replace_n(column1, 1, column3, column4), + array_replace_n(column1, column2, 4, column4), + array_replace_n(column1, column2, column3, 2) +from large_arrays_with_repeating_elements; +---- +[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 2, 3] [1, 4, 1, 3, 4, 2, 1, 3, 2, 3] +[1, 2, 2, 7, 5, 7, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [7, 7, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 7, 7] [10, 10, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] [13, 11, 12, 13, 11, 12, 10, 11, 12, 10] + +# array_replace_n scalar function with columns and scalars #2 (element is list) +query ???? +select + array_replace_n( + make_array( + [7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]), + column2, + column3, + column4 + ), + array_replace_n(column1, make_array(1, 2, 3), column3, column4), + array_replace_n(column1, column2, make_array(11, 12, 13), column4), + array_replace_n(column1, column2, column3, 2) +from nested_arrays_with_repeating_elements; +---- +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [10, 11, 12]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [19, 20, 21], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[28, 29, 30], [28, 29, 30], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] [[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ???? +select + array_replace_n( + arrow_cast(make_array( + [7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]), 'LargeList(List(Int64))'), + column2, + column3, + column4 + ), + array_replace_n(column1, make_array(1, 2, 3), column3, column4), + array_replace_n(column1, column2, make_array(11, 12, 13), column4), + array_replace_n(column1, column2, column3, 2) +from large_nested_arrays_with_repeating_elements; +---- +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [10, 11, 12]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [19, 20, 21], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[28, 29, 30], [28, 29, 30], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] [[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +## array_replace_all (aliases: `list_replace_all`) + +# array_replace_all scalar function #1 +query ??? +select + array_replace_all(make_array(1, 2, 3, 4), 2, 3), + array_replace_all(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), + array_replace_all(make_array(1, 2, 3), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] + +query ??? +select + array_replace_all(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), + array_replace_all(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), + array_replace_all(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] + +query ??? +select + array_replace_all(arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), 2, 3), + array_replace_all(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'FixedSizeList(7, Int64)'), 4, 0), + array_replace_all(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] + +# array_replace_all scalar function #2 (element is list) +query ?? +select + array_replace_all( + make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), + [4, 5, 6], + [1, 1, 1] + ), + array_replace_all( + make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), + [2, 3, 4], + [3, 1, 4] + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select + array_replace_all( + arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), + [4, 5, 6], + [1, 1, 1] + ), + array_replace_all( + arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), + [2, 3, 4], + [3, 1, 4] + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select + array_replace_all( + arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), + [4, 5, 6], + [1, 1, 1] + ), + array_replace_all( + arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), + [2, 3, 4], + [3, 1, 4] + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] + +# list_replace_all scalar function #3 (function alias `array_replace_all`) +query ??? +select + list_replace_all(make_array(1, 2, 3, 4), 2, 3), + list_replace_all(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), + list_replace_all(make_array(1, 2, 3), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] + +query ??? +select + list_replace_all(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), + list_replace_all(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), + list_replace_all(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] + +# array_replace_all scalar function #4 (null input) +query ? +select array_replace_all(make_array(1, 2, 3, 4, 5), NULL, NULL); +---- +[1, 2, 3, 4, 5] + +query ? +select array_replace_all(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, NULL); +---- +[1, 2, 3, 4, 5] + +# array_replace_all scalar function with columns #1 +query ? +select + array_replace_all(column1, column2, column3) +from arrays_with_repeating_elements; +---- +[1, 4, 1, 3, 4, 4, 1, 3, 4, 3] +[7, 7, 5, 5, 6, 5, 5, 5, 7, 7] +[10, 10, 10, 8, 10, 9, 10, 8, 10, 10] +[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] + +query ? +select + array_replace_all(column1, column2, column3) +from large_arrays_with_repeating_elements; +---- +[1, 4, 1, 3, 4, 4, 1, 3, 4, 3] +[7, 7, 5, 5, 6, 5, 5, 5, 7, 7] +[10, 10, 10, 8, 10, 9, 10, 8, 10, 10] +[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] + +# array_replace_all scalar function with columns #2 (element is list) +query ? +select + array_replace_all(column1, column2, column3) +from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [7, 8, 9]] +[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [19, 20, 21], [19, 20, 21]] +[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [28, 29, 30], [28, 29, 30]] +[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] + +query ? +select + array_replace_all(column1, column2, column3) +from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [7, 8, 9]] +[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [19, 20, 21], [19, 20, 21]] +[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [28, 29, 30], [28, 29, 30]] +[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] + +# array_replace_all scalar function with columns and scalars #1 +query ??? +select + array_replace_all(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column3), + array_replace_all(column1, 1, column3), + array_replace_all(column1, column2, 4) +from arrays_with_repeating_elements; +---- +[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 4, 3] +[1, 2, 2, 7, 5, 7, 7, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] + +query ??? +select + array_replace_all(arrow_cast(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), 'LargeList(Int64)'), column2, column3), + array_replace_all(column1, 1, column3), + array_replace_all(column1, column2, 4) +from large_arrays_with_repeating_elements; +---- +[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 4, 3] +[1, 2, 2, 7, 5, 7, 7, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] + +# array_replace_all scalar function with columns and scalars #2 (element is list) +query ??? +select + array_replace_all( + make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), + column2, + column3 + ), + array_replace_all(column1, make_array(1, 2, 3), column3), + array_replace_all(column1, column2, make_array(11, 12, 13)) +from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [10, 11, 12], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [19, 20, 21], [19, 20, 21], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [11, 12, 13], [11, 12, 13]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [11, 12, 13], [11, 12, 13]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] + +query ??? +select + array_replace_all( + arrow_cast(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), 'LargeList(List(Int64))'), + column2, + column3 + ), + array_replace_all(column1, make_array(1, 2, 3), column3), + array_replace_all(column1, column2, make_array(11, 12, 13)) +from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [10, 11, 12], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [19, 20, 21], [19, 20, 21], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [11, 12, 13], [11, 12, 13]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [11, 12, 13], [11, 12, 13]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] + +# array_replace with null handling + +statement ok +create table t as values + (make_array(3, 1, NULL, 3), 3, 4, 2), + (make_array(3, 1, NULL, 3), NULL, 5, 2), + (NULL, 3, 2, 1), + (make_array(3, 1, 3), 3, NULL, 1) +; + + +# ([3, 1, NULL, 3], 3, 4, 2) => [4, 1, NULL, 4] NULL not matched +# ([3, 1, NULL, 3], NULL, 5, 2) => [3, 1, NULL, 3] NULL is replaced with 5 +# ([NULL], 3, 2, 1) => NULL +# ([3, 1, 3], 3, NULL, 1) => [NULL, 1 3] + +query ?III? +select column1, column2, column3, column4, array_replace_n(column1, column2, column3, column4) from t; +---- +[3, 1, NULL, 3] 3 4 2 [4, 1, NULL, 4] +[3, 1, NULL, 3] NULL 5 2 [3, 1, 5, 3] +NULL 3 2 1 NULL +[3, 1, 3] 3 NULL 1 [NULL, 1, 3] + + + +statement ok +drop table t; + + + +## array_to_string (aliases: `list_to_string`, `array_join`, `list_join`) + +# array_to_string scalar function #1 +query TTT +select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2, 3, 4, 5], '-'), array_to_string([1.0, 2.0, 3.0], '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +# array_to_string scalar function #2 +query TTT +select array_to_string([1, 1, 1], '1'), array_to_string([[1, 2], [3, 4], [5, 6]], '+'), array_to_string(array_repeat(array_repeat(array_repeat(3, 2), 2), 3), '/\'); +---- +11111 1+2+3+4+5+6 3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3 + +# array_to_string scalar function #3 +query T +select array_to_string(make_array(), ',') +---- +(empty) + +# array to string dictionary +statement ok +CREATE TABLE table1 AS VALUES + (1, 'foo'), + (3, 'bar'), + (1, 'foo'), + (2, NULL), + (NULL, 'baz') + ; + +# expect 1-3-1-2 (dictionary values should be repeated) +query T +SELECT array_to_string(array_agg(column1),'-') +FROM ( + SELECT arrow_cast(column1, 'Dictionary(Int32, Int32)') as column1 + FROM table1 +); +---- +1-3-1-2 + +# expect foo,bar,foo,baz (dictionary values should be repeated) +query T +SELECT array_to_string(array_agg(column2),',') +FROM ( + SELECT arrow_cast(column2, 'Dictionary(Int64, Utf8)') as column2 + FROM table1 +); +---- +foo,bar,foo,baz + +# Expect only values that are in the group +query I?T +SELECT column1, array_agg(column2), array_to_string(array_agg(column2),',') +FROM ( + SELECT column1, arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2 + FROM table1 +) +GROUP BY column1 +ORDER BY column1; +---- +1 [foo, foo] foo,foo +2 [NULL] (empty) +3 [bar] bar +NULL [baz] baz + +# verify make_array does force to Utf8View +query T +SELECT arrow_typeof(make_array(arrow_cast('a', 'Utf8View'), 'b', 'c', 'd')); +---- +List(Utf8View) + +# expect a,b,c,d. make_array forces all types to be of a common type (see above) +query T +SELECT array_to_string(make_array(arrow_cast('a', 'Utf8View'), 'b', 'c', 'd'), ','); +---- +a,b,c,d + +# array_to_string using largeutf8 for second arg +query TTT +select array_to_string(['h', 'e', 'l', 'l', 'o'], arrow_cast(',', 'LargeUtf8')), array_to_string([1, 2, 3, 4, 5], arrow_cast('-', 'LargeUtf8')), array_to_string([1.0, 2.0, 3.0], arrow_cast('|', 'LargeUtf8')); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +# array_to_string using utf8view for second arg +query TTT +select array_to_string(['h', 'e', 'l', 'l', 'o'], arrow_cast(',', 'Utf8View')), array_to_string([1, 2, 3, 4, 5], arrow_cast('-', 'Utf8View')), array_to_string([1.0, 2.0, 3.0], arrow_cast('|', 'Utf8View')); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +statement ok +drop table table1; + + +## array_union (aliases: `list_union`) + +# array_union scalar function #1 +query ? +select array_union([1, 2, 3, 4], [5, 6, 3, 4]); +---- +[1, 2, 3, 4, 5, 6] + +query ? +select array_union(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 6, 3, 4], 'LargeList(Int64)')); +---- +[1, 2, 3, 4, 5, 6] + +query ? +select array_union(arrow_cast([1, 2, 3, 4], 'FixedSizeList(4, Int64)'), arrow_cast([5, 6, 3, 4], 'FixedSizeList(4, Int64)')); +---- +[1, 2, 3, 4, 5, 6] + +query ? +select array_union(arrow_cast([1, 2, 3, 4], 'FixedSizeList(4, Int64)'), arrow_cast([5, 6], 'FixedSizeList(2, Int64)')); +---- +[1, 2, 3, 4, 5, 6] + +# array_union scalar function #2 +query ? +select array_union([1, 2, 3, 4], [5, 6, 7, 8]); +---- +[1, 2, 3, 4, 5, 6, 7, 8] + +query ? +select array_union(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 6, 7, 8], 'LargeList(Int64)')); +---- +[1, 2, 3, 4, 5, 6, 7, 8] + +# array_union scalar function #3 +query ? +select array_union([1,2,3], []); +---- +[1, 2, 3] + +query ? +select array_union(arrow_cast([1,2,3], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Int64)')); +---- +[1, 2, 3] + +# array_union scalar function #4 +query ? +select array_union([1, 2, 3, 4], [5, 4]); +---- +[1, 2, 3, 4, 5] + +query ? +select array_union(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 4], 'LargeList(Int64)')); +---- +[1, 2, 3, 4, 5] + +# array_union scalar function #5 +statement ok +CREATE TABLE arrays_with_repeating_elements_for_union +AS VALUES + ([0, 1, 1], []), + ([1, 1], [2]), + ([2, 3], [3]), + ([3], [3, 4]) +; + +query ? +select array_union(column1, column2) from arrays_with_repeating_elements_for_union; +---- +[0, 1] +[1, 2] +[2, 3] +[3, 4] + +query ? +select array_union(arrow_cast(column1, 'LargeList(Int64)'), arrow_cast(column2, 'LargeList(Int64)')) from arrays_with_repeating_elements_for_union; +---- +[0, 1] +[1, 2] +[2, 3] +[3, 4] + +statement ok +drop table arrays_with_repeating_elements_for_union; + +# array_union scalar function #6 +query ? +select array_union([], []); +---- +[] + +query ? +select array_union(arrow_cast([], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Int64)')); +---- +[] + +# array_union scalar function #7 +# re-enable when https://github.com/apache/arrow-rs/issues/9227 is fixed +# query ? +# select array_union([[null]], []); +# ---- +# [[]] + +query error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'array_union' function: +select array_union(arrow_cast([[null]], 'LargeList(List(Int64))'), arrow_cast([], 'LargeList(Int64)')); + +# array_union scalar function #8 +query ? +select array_union([null], [null]); +---- +[NULL] + +query ? +select array_union(arrow_cast([[null]], 'LargeList(List(Int64))'), arrow_cast([[null]], 'LargeList(List(Int64))')); +---- +[[NULL]] + +# array_union scalar function #9 +query ? +select array_union(null, []); +---- +NULL + +query ? +select array_union(null, arrow_cast([], 'LargeList(Int64)')); +---- +NULL + +# array_union scalar function #10 +query ? +select array_union(null, null); +---- +NULL + +# array_union scalar function #11 +query ? +select array_union([1, 1, 2, 2, 3, 3], null); +---- +NULL + +query ? +select array_union(arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)'), null); +---- +NULL + +# array_union scalar function #12 +query ? +select array_union(null, [1, 1, 2, 2, 3, 3]); +---- +NULL + +query ? +select array_union(null, arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)')); +---- +NULL + +# array_union scalar function #13 +query ? +select array_union([1.2, 3.0], [1.2, 3.0, 5.7]); +---- +[1.2, 3.0, 5.7] + +query ? +select array_union(arrow_cast([1.2, 3.0], 'LargeList(Float64)'), arrow_cast([1.2, 3.0, 5.7], 'LargeList(Float64)')); +---- +[1.2, 3.0, 5.7] + +# array_union scalar function #14 +query ? +select array_union(['hello'], ['hello','datafusion']); +---- +[hello, datafusion] + +query ? +select array_union(arrow_cast(['hello'], 'LargeList(Utf8)'), arrow_cast(['hello','datafusion'], 'LargeList(Utf8)')); +---- +[hello, datafusion] + +query ? +select array_union(column1, column2) +from array_intersect_table_1D_NULL; +---- +[1, 2, 3, 4] +[2, 3] +[3, 4] +NULL +NULL +NULL + +query ? +select array_union(arrow_cast(null, 'List(Int64)'), [1, 2]); +---- +NULL + +query ? +select array_union([1, 2], arrow_cast(null, 'List(Int64)')); +---- +NULL + +query ? +select array_intersect(arrow_cast(null, 'List(Int64)'), [1, 2]); +---- +NULL + +query ? +select array_intersect([1, 2], arrow_cast(null, 'List(Int64)')); +---- +NULL + +query ? +select array_except(arrow_cast(null, 'List(Int64)'), [1, 2]); +---- +NULL + +query ? +select array_except([1, 2], arrow_cast(null, 'List(Int64)')); +---- +NULL + +# list_to_string scalar function #4 (function alias `array_to_string`) +query TTT +select list_to_string(['h', 'e', 'l', 'l', 'o'], ','), list_to_string([1, 2, 3, 4, 5], '-'), list_to_string([1.0, 2.0, 3.0], '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +query TTT +select list_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), list_to_string(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), list_to_string(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +# array_join scalar function #5 (function alias `array_to_string`) +query TTT +select array_join(['h', 'e', 'l', 'l', 'o'], ','), array_join([1, 2, 3, 4, 5], '-'), array_join([1.0, 2.0, 3.0], '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +query TTT +select array_join(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), array_join(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), array_join(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +# list_join scalar function #6 (function alias `list_join`) +query TTT +select list_join(['h', 'e', 'l', 'l', 'o'], ','), list_join([1, 2, 3, 4, 5], '-'), list_join([1.0, 2.0, 3.0], '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +query TTT +select list_join(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), list_join(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), list_join(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +# array_to_string scalar function with nulls #1 +query TTT +select array_to_string(make_array('h', NULL, 'l', NULL, 'o'), ','), array_to_string(make_array(1, NULL, 3, NULL, 5), '-'), array_to_string(make_array(NULL, 2.0, 3.0), '|'); +---- +h,l,o 1-3-5 2|3 + +query TTT +select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), array_to_string(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), array_to_string(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +query TTT +select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), ','), array_to_string(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), '-'), array_to_string(arrow_cast([1.0, 2.0, 3.0], 'FixedSizeList(3, Float64)'), '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +# array_to_string scalar function with nulls #2 +query TTT +select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), array_to_string(make_array(NULL, 2, NULL, 4, 5), '-', 'nil'), array_to_string(make_array(1.0, NULL, 3.0), '|', '0'); +---- +h,-,-,-,o nil-2-nil-4-5 1|0|3 + +query TTT +select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'LargeList(Utf8)'), ',', '-'), array_to_string(arrow_cast(make_array(NULL, 2, NULL, 4, 5), 'LargeList(Int64)'), '-', 'nil'), array_to_string(arrow_cast(make_array(1.0, NULL, 3.0), 'LargeList(Float64)'), '|', '0'); +---- +h,-,-,-,o nil-2-nil-4-5 1|0|3 + +query TTT +select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'FixedSizeList(5, Utf8)'), ',', '-'), array_to_string(arrow_cast(make_array(NULL, 2, NULL, 4, 5), 'FixedSizeList(5, Int64)'), '-', 'nil'), array_to_string(arrow_cast(make_array(1.0, NULL, 3.0), 'FixedSizeList(3, Float64)'), '|', '0'); +---- +h,-,-,-,o nil-2-nil-4-5 1|0|3 + +# array_to_string float formatting: special values and longer decimals +query TTT +select + array_to_string(make_array(CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE), CAST('0.30000000000000004' AS DOUBLE), CAST('1.2345678901234567' AS DOUBLE)), '|'), + array_to_string(arrow_cast(make_array(CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE), CAST('0.30000000000000004' AS DOUBLE), CAST('1.2345678901234567' AS DOUBLE)), 'LargeList(Float64)'), '|'), + array_to_string(arrow_cast(make_array(CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE), CAST('0.30000000000000004' AS DOUBLE), CAST('1.2345678901234567' AS DOUBLE)), 'FixedSizeList(5, Float64)'), '|'); +---- +NaN|inf|-inf|0.30000000000000004|1.2345678901234567 NaN|inf|-inf|0.30000000000000004|1.2345678901234567 NaN|inf|-inf|0.30000000000000004|1.2345678901234567 + +# array_to_string float formatting: scientific-notation inputs +query T +select array_to_string( + make_array( + CAST('1E20' AS DOUBLE), + CAST('-1e+20' AS DOUBLE), + CAST('6.02214076e23' AS DOUBLE), + CAST('1.2345e6' AS DOUBLE), + CAST('1e-5' AS DOUBLE), + CAST('-1e-5' AS DOUBLE), + CAST('9.1093837015e-31' AS DOUBLE), + CAST('-2.5e-4' AS DOUBLE) + ), + '|' +); +---- +100000000000000000000|-100000000000000000000|602214076000000000000000|1234500|0.00001|-0.00001|0.00000000000000000000000000000091093837015|-0.00025 + +query T +select array_to_string(arrow_cast([arrow_cast([NULL, 'a'], 'FixedSizeList(2, Utf8)'), NULL], 'FixedSizeList(2, FixedSizeList(2, Utf8))'), ',', '-'); +---- +-,a,- + +# array_to_string with columns #1 + +# For reference +# select column1, column4 from arrays_values; +# ---- +# [NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] , +# [11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] . +# [21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] - +# [31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] ok +# NULL @ +# [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] $ +# [51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] ^ +# [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] NULL + +query T +select array_to_string(column1, column4) from arrays_values; +---- +2,3,4,5,6,7,8,9,10 +11.12.13.14.15.16.17.18.20 +21-22-23-25-26-27-28-29-30 +31ok32ok33ok34ok35ok37ok38ok39ok40 +NULL +41$42$43$44$45$46$47$48$49$50 +51^52^54^55^56^57^58^59^60 +NULL + +query T +select array_to_string(column1, column4) from large_arrays_values; +---- +2,3,4,5,6,7,8,9,10 +11.12.13.14.15.16.17.18.20 +21-22-23-25-26-27-28-29-30 +31ok32ok33ok34ok35ok37ok38ok39ok40 +NULL +41$42$43$44$45$46$47$48$49$50 +51^52^54^55^56^57^58^59^60 +NULL + +query TT +select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/') from arrays_values; +---- +2_3_4_5_6_7_8_9_10 1/2/3 +11_12_13_14_15_16_17_18_20 1/2/3 +21_22_23_25_26_27_28_29_30 1/2/3 +31_32_33_34_35_37_38_39_40 1/2/3 +NULL 1/2/3 +41_42_43_44_45_46_47_48_49_50 1/2/3 +51_52_54_55_56_57_58_59_60 1/2/3 +61_62_63_64_65_66_67_68_69_70 1/2/3 + +query TT +select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/') from large_arrays_values; +---- +2_3_4_5_6_7_8_9_10 1/2/3 +11_12_13_14_15_16_17_18_20 1/2/3 +21_22_23_25_26_27_28_29_30 1/2/3 +31_32_33_34_35_37_38_39_40 1/2/3 +NULL 1/2/3 +41_42_43_44_45_46_47_48_49_50 1/2/3 +51_52_54_55_56_57_58_59_60 1/2/3 +61_62_63_64_65_66_67_68_69_70 1/2/3 + +query TT +select array_to_string(column1, '_', '*'), array_to_string(make_array(make_array(1,2,3)), '.') from arrays_values; +---- +*_2_3_4_5_6_7_8_9_10 1.2.3 +11_12_13_14_15_16_17_18_*_20 1.2.3 +21_22_23_*_25_26_27_28_29_30 1.2.3 +31_32_33_34_35_*_37_38_39_40 1.2.3 +NULL 1.2.3 +41_42_43_44_45_46_47_48_49_50 1.2.3 +51_52_*_54_55_56_57_58_59_60 1.2.3 +61_62_63_64_65_66_67_68_69_70 1.2.3 + +query TT +select array_to_string(column1, '_', '*'), array_to_string(make_array(make_array(1,2,3)), '.') from large_arrays_values; +---- +*_2_3_4_5_6_7_8_9_10 1.2.3 +11_12_13_14_15_16_17_18_*_20 1.2.3 +21_22_23_*_25_26_27_28_29_30 1.2.3 +31_32_33_34_35_*_37_38_39_40 1.2.3 +NULL 1.2.3 +41_42_43_44_45_46_47_48_49_50 1.2.3 +51_52_*_54_55_56_57_58_59_60 1.2.3 +61_62_63_64_65_66_67_68_69_70 1.2.3 + +# array_to_string with per-row null_string column +statement ok +CREATE TABLE test_null_str_col AS VALUES + (make_array(1, NULL, 3), ',', 'N/A'), + (make_array(NULL, 5, NULL), ',', 'MISSING'), + (make_array(10, NULL, 12), '-', 'X'), + (make_array(20, NULL, 21), '-', NULL); + +query T +SELECT array_to_string(column1, column2, column3) FROM test_null_str_col; +---- +1,N/A,3 +MISSING,5,MISSING +10-X-12 +20-21 + +statement ok +DROP TABLE test_null_str_col; + +# array_to_string with decimal values +query T +select array_to_string(arrow_cast(make_array(1.5, NULL, 3.14), 'List(Decimal128(10, 2))'), ',', 'N'); +---- +1.50,N,3.14 + +# array_to_string with date values +query T +select array_to_string(arrow_cast(make_array('2024-01-15', '2024-06-30', '2024-12-25'), 'List(Date32)'), ','); +---- +2024-01-15,2024-06-30,2024-12-25 + +query T +select array_to_string(arrow_cast(make_array('2024-01-15', NULL, '2024-12-25'), 'List(Date32)'), ',', 'N'); +---- +2024-01-15,N,2024-12-25 + +# array_to_string with timestamp values +query T +select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Second, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Second, None)')), '|'); +---- +2024-01-15T10:30:00|2024-06-30T15:45:00 + +query T +select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Millisecond, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Millisecond, None)')), '|'); +---- +2024-01-15T10:30:00|2024-06-30T15:45:00 + +query T +select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Microsecond, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Microsecond, None)')), '|'); +---- +2024-01-15T10:30:00|2024-06-30T15:45:00 + +query T +select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Nanosecond, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Nanosecond, None)')), '|'); +---- +2024-01-15T10:30:00|2024-06-30T15:45:00 + +# array_to_string with time values +query T +select array_to_string(make_array(arrow_cast('10:30:00', 'Time32(Second)'), arrow_cast('15:45:00', 'Time32(Second)')), ','); +---- +10:30:00,15:45:00 + +query T +select array_to_string(make_array(arrow_cast('10:30:00', 'Time64(Microsecond)'), arrow_cast('15:45:00', 'Time64(Microsecond)')), ','); +---- +10:30:00,15:45:00 + +# array_to_string with interval values +query T +select array_to_string(make_array(interval '1 year 2 months', interval '3 days 4 hours'), ','); +---- +14 mons,3 days 4 hours + +# array_to_string with duration values +query T +select array_to_string(make_array(arrow_cast(1000, 'Duration(Millisecond)'), arrow_cast(2000, 'Duration(Millisecond)')), ','); +---- +PT1S,PT2S + + +## cardinality + +# cardinality scalar function +query III +select cardinality(make_array(1, 2, 3, 4, 5)), cardinality([1, 3, 5]), cardinality(make_array('h', 'e', 'l', 'l', 'o')); +---- +5 3 5 + +query III +select cardinality(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), cardinality(arrow_cast([1, 3, 5], 'LargeList(Int64)')), cardinality(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +5 3 5 + +query III +select cardinality(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)')), cardinality(arrow_cast([1, 3, 5], 'FixedSizeList(3, Int64)')), cardinality(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); +---- +5 3 5 + +# cardinality scalar function #2 +query II +select cardinality(make_array([1, 2], [3, 4], [5, 6])), cardinality(array_repeat(array_repeat(array_repeat(3, 3), 2), 3)); +---- +6 18 + +query I +select cardinality(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))')); +---- +6 + +query I +select cardinality(arrow_cast([[1, 2], [3, 4], [5, 6]], 'FixedSizeList(3, List(Int64))')); +---- +6 + +# cardinality scalar function #3 +query II +select cardinality(make_array()), cardinality(make_array(make_array())) +---- +0 0 + +query II +select cardinality([]), cardinality([]::int[]) as with_cast +---- +0 0 + +query II +select cardinality(arrow_cast(make_array(), 'LargeList(Int64)')), cardinality(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) +---- +0 0 + +#TODO +#https://github.com/apache/datafusion/issues/9158 +#query II +#select cardinality(arrow_cast(make_array(), 'FixedSizeList(1, Null)')), cardinality(arrow_cast(make_array(make_array()), 'FixedSizeList(1, List(Int64))')) +#---- +#NULL 0 + +# cardinality of NULL arrays should return NULL +query II +select cardinality(NULL), cardinality(arrow_cast(NULL, 'LargeList(Int64)')) +---- +NULL NULL + +# cardinality with columns +query III +select cardinality(column1), cardinality(column2), cardinality(column3) from arrays; +---- +4 3 5 +4 3 5 +4 3 5 +4 3 3 +NULL 3 4 +4 NULL 1 +4 3 NULL + +query III +select cardinality(column1), cardinality(column2), cardinality(column3) from large_arrays; +---- +4 3 5 +4 3 5 +4 3 5 +4 3 3 +NULL 3 4 +4 NULL 1 +4 3 NULL + +query III +select cardinality(column1), cardinality(column2), cardinality(column3) from fixed_size_arrays; +---- +4 3 5 +4 3 5 +4 3 5 +4 3 5 +NULL 3 5 +4 NULL 5 +4 3 NULL + +## array_remove (aliases: `list_remove`) + +# array_remove scalar function #1 +query ??? +select array_remove(make_array(1, 2, 2, 1, 1), 2), array_remove(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), array_remove(make_array('h', 'e', 'l', 'l', 'o'), 'l'); +---- +[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] + +query ??? +select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2), + array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'LargeList(Float64)'), 1.0), + array_remove(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 'l'); +---- +[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] + +query ??? +select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int64)'), 2), + array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float64)'), 1.0), + array_remove(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 'l'); +---- +[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] + +query ??? +select + array_remove(make_array(1, null, 2, 3), 2), + array_remove(make_array(1.1, null, 2.2, 3.3), 1.1), + array_remove(make_array('a', null, 'bc'), 'a'); +---- +[1, NULL, 3] [NULL, 2.2, 3.3] [NULL, bc] + +query ??? +select + array_remove(arrow_cast(make_array(1, null, 2, 3), 'LargeList(Int64)'), 2), + array_remove(arrow_cast(make_array(1.1, null, 2.2, 3.3), 'LargeList(Float64)'), 1.1), + array_remove(arrow_cast(make_array('a', null, 'bc'), 'LargeList(Utf8)'), 'a'); +---- +[1, NULL, 3] [NULL, 2.2, 3.3] [NULL, bc] + +query ??? +select + array_remove(arrow_cast(make_array(1, null, 2, 3), 'FixedSizeList(4, Int64)'), 2), + array_remove(arrow_cast(make_array(1.1, null, 2.2, 3.3), 'FixedSizeList(4, Float64)'), 1.1), + array_remove(arrow_cast(make_array('a', null, 'bc'), 'FixedSizeList(3, Utf8)'), 'a'); +---- +[1, NULL, 3] [NULL, 2.2, 3.3] [NULL, bc] + +#TODO: https://github.com/apache/datafusion/issues/7142 +# follow PostgreSQL behavior +#query ? +#select +# array_remove(NULL, 1) +#---- +#NULL + +query ?? +select + array_remove(make_array(1, null, 2), null), + array_remove(make_array(1, null, 2, null), null); +---- +NULL NULL + +query ?? +select + array_remove(arrow_cast(make_array(1, null, 2), 'LargeList(Int64)'), null), + array_remove(arrow_cast(make_array(1, null, 2, null), 'LargeList(Int64)'), null); +---- +NULL NULL + +query ?? +select + array_remove(arrow_cast(make_array(1, null, 2), 'FixedSizeList(3, Int64)'), null), + array_remove(arrow_cast(make_array(1, null, 2, null), 'FixedSizeList(4, Int64)'), null); +---- +NULL NULL + +# array_remove with null element from column +query ? +select array_remove(column1, column2) from (values + (make_array(1, 2, 3), 2), + (make_array(4, 5, 6), null), + (make_array(7, 8, 9), 8), + (null, 1) +) as t(column1, column2); +---- +[1, 3] +NULL +[7, 9] +NULL + +# array_remove with null element from column (LargeList) +query ? +select array_remove(column1, column2) from (values + (arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 2), + (arrow_cast(make_array(4, 5, 6), 'LargeList(Int64)'), null), + (arrow_cast(make_array(7, 8, 9), 'LargeList(Int64)'), 8) +) as t(column1, column2); +---- +[1, 3] +NULL +[7, 9] + +# array_remove scalar function #2 (element is list) +query ?? +select array_remove(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_remove(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6]), + array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), + array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [4, 5, 6]), + array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +# list_remove scalar function #3 (function alias `array_remove`) +query ??? +select list_remove(make_array(1, 2, 2, 1, 1), 2), list_remove(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), list_remove(make_array('h', 'e', 'l', 'l', 'o'), 'l'); +---- +[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] + +query ?? +select list_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), + list_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +# array_remove scalar function with columns #1 +query ? +select array_remove(column1, column2) from arrays_with_repeating_elements; +---- +[1, 1, 3, 2, 2, 1, 3, 2, 3] +[4, 5, 5, 6, 5, 5, 5, 4, 4] +[7, 7, 8, 7, 9, 7, 8, 7, 7] +[11, 12, 10, 11, 12, 10, 11, 12, 10] + +query ? +select array_remove(column1, column2) from large_arrays_with_repeating_elements; +---- +[1, 1, 3, 2, 2, 1, 3, 2, 3] +[4, 5, 5, 6, 5, 5, 5, 4, 4] +[7, 7, 8, 7, 9, 7, 8, 7, 7] +[11, 12, 10, 11, 12, 10, 11, 12, 10] + +query ? +select array_remove(column1, column2) from fixed_arrays_with_repeating_elements; +---- +[1, 1, 3, 2, 2, 1, 3, 2, 3] +[4, 5, 5, 6, 5, 5, 5, 4, 4] +[7, 7, 8, 7, 9, 7, 8, 7, 7] +[11, 12, 10, 11, 12, 10, 11, 12, 10] + +# array_remove scalar function with columns #2 (element is list) +query ? +select array_remove(column1, column2) from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ? +select array_remove(column1, column2) from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ? +select array_remove(column1, column2) from fixed_size_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +# array_remove scalar function with columns and scalars #1 +query ?? +select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from arrays_with_repeating_elements; +---- +[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3] +[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +query ?? +select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from large_arrays_with_repeating_elements; +---- +[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3] +[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +query ?? +select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from fixed_arrays_with_repeating_elements; +---- +[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3] +[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +# array_remove scalar function with columns and scalars #2 (element is list) +query ?? +select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), + array_remove(column1, make_array(1, 2, 3)) from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ?? +select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), + array_remove(column1, make_array(1, 2, 3)) from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ?? +select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), + array_remove(column1, make_array(1, 2, 3)) from fixed_size_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +## array_remove_n (aliases: `list_remove_n`) + +# array_remove_n with null element scalar +query ?? +select array_remove_n(make_array(1, 2, 2, 1, 1), NULL, 2), + array_remove_n(make_array(1, 2, 2, 1, 1), 2, 2); +---- +NULL [1, 1, 1] + +# array_remove_n with null element scalar (LargeList) +query ?? +select array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), NULL, 2), + array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2, 2); +---- +NULL [1, 1, 1] + +# array_remove_n with null element from column +query ? +select array_remove_n(column1, column2, column3) from (values + (make_array(1, 2, 2, 1, 1), 2, 2), + (make_array(3, 4, 4, 3, 3), null, 2), + (make_array(5, 6, 6, 5, 5), 6, 1), + (null, 1, 1) +) as t(column1, column2, column3); +---- +[1, 1, 1] +NULL +[5, 6, 5, 5] +NULL + +# array_remove_n with null element from column (LargeList) +query ? +select array_remove_n(column1, column2, column3) from (values + (arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2, 2), + (arrow_cast(make_array(3, 4, 4, 3, 3), 'LargeList(Int64)'), null, 2), + (arrow_cast(make_array(5, 6, 6, 5, 5), 'LargeList(Int64)'), 6, 1) +) as t(column1, column2, column3); +---- +[1, 1, 1] +NULL +[5, 6, 5, 5] + +# array_remove_n scalar function #1 +query ??? +select array_remove_n(make_array(1, 2, 2, 1, 1), 2, 2), array_remove_n(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0, 2), array_remove_n(make_array('h', 'e', 'l', 'l', 'o'), 'l', 3); +---- +[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] + +query ??? +select array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int32)'), 2, 2), + array_remove_n(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'LargeList(Float32)'), 1.0, 2), + array_remove_n(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 'l', 3); +---- +[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] + +query ??? +select array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int32)'), 2, 2), + array_remove_n(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float32)'), 1.0, 2), + array_remove_n(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 'l', 3); +---- +[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] + +# array_remove_n scalar function #2 (element is list) +query ?? +select array_remove_n(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6], 2), array_remove_n(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4], 2); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + +query ?? +select array_remove_n(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6], 2), + array_remove_n(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4], 2); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + +query ?? +select array_remove_n(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [4, 5, 6], 2), + array_remove_n(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [2, 3, 4], 2); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + +# list_remove_n scalar function #3 (function alias `array_remove_n`) +query ??? +select list_remove_n(make_array(1, 2, 2, 1, 1), 2, 2), list_remove_n(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0, 2), list_remove_n(make_array('h', 'e', 'l', 'l', 'o'), 'l', 3); +---- +[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] + +# array_remove_n scalar function with columns #1 +query ? +select array_remove_n(column1, column2, column4) from arrays_with_repeating_elements; +---- +[1, 1, 3, 1, 3, 2, 3] +[5, 5, 6, 5, 5, 5, 4, 4] +[8, 9, 8, 7, 7] +[11, 12, 11, 12, 11, 12] + +# array_remove_n scalar function with columns #2 (element is list) +query ? +select array_remove_n(column1, column2, column4) from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[22, 23, 24], [25, 26, 27], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36]] + +# array_remove_n scalar function with columns and scalars #1 +query ??? +select array_remove_n(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column4), array_remove_n(column1, 1, column4), array_remove_n(column1, column2, 2) from arrays_with_repeating_elements; +---- +[1, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 3, 2, 2, 3, 2, 3] [1, 1, 3, 2, 1, 3, 2, 3] +[1, 2, 2, 5, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [11, 12, 11, 12, 10, 11, 12, 10] + +# array_remove_n scalar function with columns and scalars #2 (element is list) +query ??? +select array_remove_n(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2, column4), array_remove_n(column1, make_array(1, 2, 3), column4), array_remove_n(column1, column2, 2) from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [7, 8, 9], [4, 5, 6], [4, 5, 6], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +## array_remove_all (aliases: `list_removes`) + +#TODO: https://github.com/apache/datafusion/issues/7142 +# array_remove_all with NULL elements +#query ? +#select array_remove_all(NULL, 1); +#---- +#NULL + +query ? +select array_remove_all(make_array(1, 2, 2, 1, 1), NULL); +---- +NULL + +# array_remove_all with null element from column +query ? +select array_remove_all(column1, column2) from (values + (make_array(1, 2, 2, 1, 1), 2), + (make_array(3, 4, 4, 3, 3), null), + (make_array(5, 6, 6, 5, 5), 6), + (null, 1) +) as t(column1, column2); +---- +[1, 1, 1] +NULL +[5, 5, 5] +NULL + +# array_remove_all with null element from column (LargeList) +query ? +select array_remove_all(column1, column2) from (values + (arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2), + (arrow_cast(make_array(3, 4, 4, 3, 3), 'LargeList(Int64)'), null), + (arrow_cast(make_array(5, 6, 6, 5, 5), 'LargeList(Int64)'), 6) +) as t(column1, column2); +---- +[1, 1, 1] +NULL +[5, 5, 5] + +# array_remove_all scalar function #1 +query ??? +select array_remove_all(make_array(1, 2, 2, 1, 1), 2), array_remove_all(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), array_remove_all(make_array('h', 'e', 'l', 'l', 'o'), 'l'); +---- +[1, 1, 1] [2.0, 2.0] [h, e, o] + +query ??? +select array_remove_all(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2), + array_remove_all(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'LargeList(Float64)'), 1.0), + array_remove_all(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 'l'); +---- +[1, 1, 1] [2.0, 2.0] [h, e, o] + +query ??? +select array_remove_all(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int64)'), 2), array_remove_all(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float64)'), 1.0), array_remove_all(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 'l'); +---- +[1, 1, 1] [2.0, 2.0] [h, e, o] + +# array_remove_all scalar function #2 (element is list) +query ?? +select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_remove_all(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + +query ?? +select array_remove_all(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), + array_remove_all(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + +query ?? +select array_remove_all(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [4, 5, 6]), + array_remove_all(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + +# list_remove_all scalar function #3 (function alias `array_remove_all`) +query ??? +select list_remove_all(make_array(1, 2, 2, 1, 1), 2), list_remove_all(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), list_remove_all(make_array('h', 'e', 'l', 'l', 'o'), 'l'); +---- +[1, 1, 1] [2.0, 2.0] [h, e, o] + +query ?? +select list_remove_all(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), + list_remove_all(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + +# array_remove_all scalar function with columns #1 +query ? +select array_remove_all(column1, column2) from arrays_with_repeating_elements; +---- +[1, 1, 3, 1, 3, 3] +[5, 5, 6, 5, 5, 5] +[8, 9, 8] +[11, 12, 11, 12, 11, 12] + +query ? +select array_remove_all(column1, column2) from fixed_arrays_with_repeating_elements; +---- +[1, 1, 3, 1, 3, 3] +[5, 5, 6, 5, 5, 5] +[8, 9, 8] +[11, 12, 11, 12, 11, 12] + +# array_remove_all scalar function with columns #2 (element is list) +query ? +select array_remove_all(column1, column2) from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [1, 2, 3], [7, 8, 9], [7, 8, 9]] +[[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15]] +[[22, 23, 24], [25, 26, 27], [22, 23, 24]] +[[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36]] + +query ? +select array_remove_all(column1, column2) from fixed_size_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [1, 2, 3], [7, 8, 9], [7, 8, 9]] +[[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15]] +[[22, 23, 24], [25, 26, 27], [22, 23, 24]] +[[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36]] + +# array_remove_all scalar function with columns and scalars #1 +query ?? +select array_remove_all(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove_all(column1, 1) from arrays_with_repeating_elements; +---- +[1, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 3, 2, 2, 3, 2, 3] +[1, 2, 2, 5, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +query ?? +select array_remove_all(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove_all(column1, 1) from fixed_arrays_with_repeating_elements; +---- +[1, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 3, 2, 2, 3, 2, 3] +[1, 2, 2, 5, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +# array_remove_all scalar function with columns and scalars #2 (element is list) +query ?? +select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), array_remove_all(column1, make_array(1, 2, 3)) from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [7, 8, 9], [4, 5, 6], [4, 5, 6], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ?? +select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), + array_remove_all(column1, make_array(1, 2, 3)) from fixed_size_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [7, 8, 9], [4, 5, 6], [4, 5, 6], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +## trim_array (deprecated) + +## array_length (aliases: `list_length`) + +# array_length scalar function #1 +query III +select array_length(make_array(1, 2, 3, 4, 5)), array_length(make_array(1, 2, 3)), array_length(make_array([1, 2], [3, 4], [5, 6])); +---- +5 3 3 + +query III +select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), array_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))')); +---- +5 3 3 + +# array_length scalar function #2 +query III +select array_length(make_array(1, 2, 3, 4, 5), 1), array_length(make_array(1, 2, 3), 1), array_length(make_array([1, 2], [3, 4], [5, 6]), 1); +---- +5 3 3 + +query III +select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 1), array_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 1), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))'), 1); +---- +5 3 3 + +# array_length scalar function #3 +query III +select array_length(make_array(1, 2, 3, 4, 5), 2), array_length(make_array(1, 2, 3), 2), array_length(make_array([1, 2], [3, 4], [5, 6]), 2); +---- +NULL NULL 2 + +query III +select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 2), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))'), 2); +---- +NULL NULL 2 + +# array_length scalar function #4 +query II +select array_length(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 1), array_length(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 2); +---- +3 2 + +query II +select array_length(arrow_cast(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 'LargeList(List(List(Int64)))'), 1), array_length(arrow_cast(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 'LargeList(List(List(Int64)))'), 2); +---- +3 2 + +# array_length scalar function #5 +query III +select array_length(make_array()), array_length(make_array(), 1), array_length(make_array(), 2) +---- +0 0 NULL + +# array_length scalar function #6 nested array +query III +select array_length([[1, 2, 3, 4], [5, 6, 7, 8]]), array_length([[1, 2, 3, 4], [5, 6, 7, 8]], 1), array_length([[1, 2, 3, 4], [5, 6, 7, 8]], 2); +---- +2 2 4 + +# list_length scalar function #7 (function alias `array_length`) +query IIII +select list_length(make_array(1, 2, 3, 4, 5)), list_length(make_array(1, 2, 3)), list_length(make_array([1, 2], [3, 4], [5, 6])), array_length([[1, 2, 3, 4], [5, 6, 7, 8]], 3); +---- +5 3 3 NULL + +query III +select list_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), list_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))')); +---- +5 3 3 + +# array_length with columns +query I +select array_length(column1, column3) from arrays_values; +---- +10 +NULL +NULL +NULL +NULL +NULL +NULL +NULL + +query I +select array_length(arrow_cast(column1, 'LargeList(Int64)'), column3) from arrays_values; +---- +10 +NULL +NULL +NULL +NULL +NULL +NULL +NULL + +# array_length with columns and scalars +query II +select array_length(array[array[1, 2], array[3, 4]], column3), array_length(column1, 1) from arrays_values; +---- +2 10 +2 10 +NULL 10 +NULL 10 +NULL NULL +NULL 10 +NULL 10 +NULL 10 + +query II +select array_length(arrow_cast(array[array[1, 2], array[3, 4]], 'LargeList(List(Int64))'), column3), array_length(arrow_cast(column1, 'LargeList(Int64)'), 1) from arrays_values; +---- +2 10 +2 10 +NULL 10 +NULL 10 +NULL NULL +NULL 10 +NULL 10 +NULL 10 + +# array_length for fixed sized list + +query III +select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_length(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3, List(Int64))')); +---- +5 3 3 + +query III +select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 1), array_length(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 1), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3, List(Int64))'), 1); +---- +5 3 3 + + +query RRR +select array_distance([2], [3]), list_distance([1], [2]), list_distance([1], [-2]); +---- +1 1 3 + +query error +select list_distance([1], [1, 2]); + +query R +select array_distance([[1, 1]], [1, 2]); +---- +1 + +query R +select array_distance([[1, 1]], [[1, 2]]); +---- +1 + +query R +select array_distance([[1, 1]], [[1, 2]]); +---- +1 + +query RR +select array_distance([1, 1, 0, 0], [2, 2, 1, 1]), list_distance([1, 2, 3], [1, 2, 3]); +---- +2 0 + +query RR +select array_distance([1.0, 1, 0, 0], [2, 2.0, 1, 1]), list_distance([1, 2.0, 3], [1, 2, 3]); +---- +2 0 + +query R +select list_distance([1, 1, NULL, 0], [2, 2, NULL, NULL]); +---- +NULL + +query R +select list_distance([NULL, NULL], [NULL, NULL]); +---- +NULL + +query R +select list_distance([1.0, 2.0, 3.0], [1.0, 2.0, 3.5]) AS distance; +---- +0.5 + +query R +select list_distance([1, 2, 3], [1, 2, 3]) AS distance; +---- +0 + +# array_distance with columns +query RRR +select array_distance(column1, column2), array_distance(column1, column3), array_distance(column1, column4) from arrays_distance_table; +---- +0 0.374165738677 NULL +5.196152422707 6.063827174318 NULL +10.392304845413 11.778794505381 NULL +15.58845726812 15.935494971917 NULL + +query RRR +select array_distance(column1, column2), array_distance(column1, column3), array_distance(column1, column4) from large_arrays_distance_table; +---- +0 0.374165738677 NULL +5.196152422707 6.063827174318 NULL +10.392304845413 11.778794505381 NULL +15.58845726812 15.935494971917 NULL + +query RRR +select array_distance(column1, column2), array_distance(column1, column3), array_distance(column1, column4) from fixed_size_arrays_distance_table; +---- +0 0.374165738677 NULL +5.196152422707 6.063827174318 NULL +10.392304845413 11.778794505381 NULL +15.58845726812 15.935494971917 NULL + + +## array_dims (aliases: `list_dims`) + +# array dims error +query error +select array_dims(1); + +# array_dims scalar function +query ??? +select array_dims(make_array(1, 2, 3)), array_dims(make_array([1, 2], [3, 4])), array_dims(make_array([[[[1], [2]]]])); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + +query ??? +select array_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_dims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), array_dims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))')); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + +query ??? +select array_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2, List(Int64))')), array_dims(arrow_cast(make_array([[[[1], [2]]]]), 'FixedSizeList(1, List(List(List(List(Int64)))))')); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + +# array_dims scalar function #2 +query ?? +select array_dims(array_repeat(array_repeat(array_repeat(2, 3), 2), 1)), array_dims(array_repeat(array_repeat(array_repeat(3, 4), 5), 2)); +---- +[1, 2, 3] [2, 5, 4] + +query ?? +select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2), 1), 'LargeList(List(List(Int64)))')), array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2), 'LargeList(List(List(Int64)))')); +---- +[1, 2, 3] [2, 5, 4] + +query ?? +select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2), 1), 'FixedSizeList(1, List(List(Int64)))')), array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2), 'FixedSizeList(2, List(List(Int64)))')); +---- +[1, 2, 3] [2, 5, 4] + +# array_dims scalar function #3 +query ?? +select array_dims(make_array()), array_dims(make_array(make_array())) +---- +NULL [1, 0] + +query ?? +select array_dims(arrow_cast(make_array(), 'LargeList(Int64)')), array_dims(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) +---- +NULL [1, 0] + +# list_dims scalar function #4 (function alias `array_dims`) +query ??? +select list_dims(make_array(1, 2, 3)), list_dims(make_array([1, 2], [3, 4])), list_dims(make_array([[[[1], [2]]]])); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + +query ??? +select list_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_dims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), list_dims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))')); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + +query ??? +select list_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), list_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2, List(Int64))')), list_dims(arrow_cast(make_array([[[[1], [2]]]]), 'FixedSizeList(1, List(List(List(List(Int64)))))')); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + +# array_dims with columns +query ??? +select array_dims(column1), array_dims(column2), array_dims(column3) from arrays; +---- +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [3] +NULL [3] [4] +[2, 2] NULL [1] +[2, 2] [3] NULL + +query ??? +select array_dims(column1), array_dims(column2), array_dims(column3) from large_arrays; +---- +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [3] +NULL [3] [4] +[2, 2] NULL [1] +[2, 2] [3] NULL + +query ??? +select array_dims(column1), array_dims(column2), array_dims(column3) from fixed_size_arrays; +---- +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [5] +NULL [3] [5] +[2, 2] NULL [5] +[2, 2] [3] NULL + + +## array_ndims (aliases: `list_ndims`) + +# array_ndims scalar function #1 + +#follow PostgreSQL +query I +select + array_ndims(null); +---- +NULL + +query I +select + array_ndims([2, 3]); +---- +1 + +statement ok +CREATE TABLE array_ndims_table +AS VALUES + ([1], [1, 2, 3], [[7]], [[[[[10]]]]]), + ([2], [4, 5], [[8]], [[[[[10]]]]]), + (NUll, [6, 7], [[9]], [[[[[10]]]]]), + ([3], [6], [[9]], [[[[[10]]]]]) +; + +statement ok +CREATE TABLE large_array_ndims_table +AS SELECT + column1, + arrow_cast(column2, 'LargeList(Int64)') as column2, + arrow_cast(column3, 'LargeList(List(Int64))') as column3, + arrow_cast(column4, 'LargeList(List(List(List(List(Int64)))))') as column4 +FROM array_ndims_table; + +statement ok +CREATE TABLE fixed_array_ndims_table +AS VALUES + (arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)'), arrow_cast([[7]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), + (arrow_cast([2], 'FixedSizeList(1, Int64)'), arrow_cast([4, 5, 6], 'FixedSizeList(3, Int64)'), arrow_cast([[8]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), + (null, arrow_cast([6, 7, 8], 'FixedSizeList(3, Int64)'), arrow_cast([[9]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), + (arrow_cast([3], 'FixedSizeList(1, Int64)'), arrow_cast([6, 7, 8], 'FixedSizeList(3, Int64)'), arrow_cast([[9]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')) +; + +query IIII +select + array_ndims(column1), + array_ndims(column2), + array_ndims(column3), + array_ndims(column4) +from array_ndims_table; +---- +1 1 2 5 +1 1 2 5 +NULL 1 2 5 +1 1 2 5 + +query IIII +select + array_ndims(column1), + array_ndims(column2), + array_ndims(column3), + array_ndims(column4) +from large_array_ndims_table; +---- +1 1 2 5 +1 1 2 5 +NULL 1 2 5 +1 1 2 5 + +query IIII +select + array_ndims(column1), + array_ndims(column2), + array_ndims(column3), + array_ndims(column4) +from fixed_array_ndims_table; +---- +1 1 2 5 +1 1 2 5 +NULL 1 2 5 +1 1 2 5 + + + +statement ok +drop table array_ndims_table; + +statement ok +drop table large_array_ndims_table + +query I +select array_ndims(arrow_cast([null], 'List(List(List(Int64)))')); +---- +3 + +# array_ndims scalar function #2 +query II +select array_ndims(array_repeat(array_repeat(array_repeat(1, 3), 2), 1)), array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]); +---- +3 21 + +# array_ndims scalar function #3 +query II +select array_ndims(make_array()), array_ndims(make_array(make_array())) +---- +1 2 + +query II +select array_ndims(arrow_cast(make_array(), 'LargeList(Int64)')), array_ndims(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) +---- +1 2 + +# list_ndims scalar function #4 (function alias `array_ndims`) +query III +select list_ndims(make_array(1, 2, 3)), list_ndims(make_array([1, 2], [3, 4])), list_ndims(make_array([[[[1], [2]]]])); +---- +1 2 5 + +query III +select list_ndims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_ndims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), list_ndims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))')); +---- +1 2 5 + +query II +select list_ndims(make_array()), list_ndims(make_array(make_array())) +---- +1 2 + +query II +select list_ndims(arrow_cast(make_array(), 'LargeList(Int64)')), list_ndims(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) +---- +1 2 + +# array_ndims with columns +query III +select array_ndims(column1), array_ndims(column2), array_ndims(column3) from arrays; +---- +2 1 1 +2 1 1 +2 1 1 +2 1 1 +NULL 1 1 +2 NULL 1 +2 1 NULL + +query III +select array_ndims(column1), array_ndims(column2), array_ndims(column3) from large_arrays; +---- +2 1 1 +2 1 1 +2 1 1 +2 1 1 +NULL 1 1 +2 NULL 1 +2 1 NULL + +## array_has/array_has_all/array_has_any + +# If lhs is empty, return false +query B +select array_has([], 1); +---- +false + +# If rhs is Null, we returns Null +query BBB +select array_has([], null), + array_has([1, 2, 3], null), + array_has([null, 1], null); +---- +NULL NULL NULL + +# Always return false if not contained even if list has null elements +query BB +select array_has([1, null, 2], 3), + array_has([null, null, null], 3); +---- +false false + +#TODO: array_has_all and array_has_any cannot handle NULL +#query BBBB +#select array_has_any([], null), +# array_has_any([1, 2, 3], null), +# array_has_all([], null), +# array_has_all([1, 2, 3], null); +#---- +#false false false false + +query BBBBBBBBBBBB +select array_has(make_array(1,2), 1), + array_has(make_array(1,2,NULL), 1), + array_has(make_array([2,3], [3,4]), make_array(2,3)), + array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([1], [2,3])), + array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([4,5], [6])), + array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([1])), + array_has(make_array([[[1]]]), make_array([[1]])), + array_has(make_array([[[1]]], [[[1], [2]]]), make_array([[2]])), + array_has(make_array([[[1]]], [[[1], [2]]]), make_array([[1], [2]])), + list_has(make_array(1,2,3), 4), + array_contains(make_array(1,2,3), 3), + list_contains(make_array(1,2,3), 0) +; +---- +true true true true true false true false true false true false + +query BBBBBBBBBBBB +select array_has(arrow_cast(make_array(1,2), 'LargeList(Int64)'), 1), + array_has(arrow_cast(make_array(1,2,NULL), 'LargeList(Int64)'), 1), + array_has(arrow_cast(make_array([2,3], [3,4]), 'LargeList(List(Int64))'), make_array(2,3)), + array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'LargeList(List(List(Int64)))'), make_array([1], [2,3])), + array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'LargeList(List(List(Int64)))'), make_array([4,5], [6])), + array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'LargeList(List(List(Int64)))'), make_array([1])), + array_has(arrow_cast(make_array([[[1]]]), 'LargeList(List(List(List(Int64))))'), make_array([[1]])), + array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'LargeList(List(List(List(Int64))))'), make_array([[2]])), + array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'LargeList(List(List(List(Int64))))'), make_array([[1], [2]])), + list_has(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), 4), + array_contains(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), 3), + list_contains(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), 0) +; +---- +true true true true true false true false true false true false + +query BBBBBBBBBBBB +select array_has(arrow_cast(make_array(1,2), 'FixedSizeList(2, Int64)'), 1), + array_has(arrow_cast(make_array(1,2,NULL), 'FixedSizeList(3, Int64)'), 1), + array_has(arrow_cast(make_array([2,3], [3,4]), 'FixedSizeList(2, List(Int64))'), make_array(2,3)), + array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'FixedSizeList(2, List(List(Int64)))'), make_array([1], [2,3])), + array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'FixedSizeList(2, List(List(Int64)))'), make_array([4,5], [6])), + array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'FixedSizeList(2, List(List(Int64)))'), make_array([1])), + array_has(arrow_cast(make_array([[[1]]]), 'FixedSizeList(1, List(List(List(Int64))))'), make_array([[1]])), + array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'FixedSizeList(2, List(List(List(Int64))))'), make_array([[2]])), + array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'FixedSizeList(2, List(List(List(Int64))))'), make_array([[1], [2]])), + list_has(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), 4), + array_contains(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), 3), + list_contains(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), 0) +; +---- +true true true true true false true false true false true false + +query BBB +select array_has(column1, column2), + array_has_all(column3, column4), + array_has_any(column5, column6) +from array_has_table_1D; +---- +true true true +false false false + +query BBB +select array_has(arrow_cast(column1, 'LargeList(Int64)'), column2), + array_has_all(arrow_cast(column3, 'LargeList(Int64)'), arrow_cast(column4, 'LargeList(Int64)')), + array_has_any(arrow_cast(column5, 'LargeList(Int64)'), arrow_cast(column6, 'LargeList(Int64)')) +from array_has_table_1D; +---- +true true true +false false false + +query B +select array_has(column1, column2) +from array_has_table_null; +---- +true +true +false +false +false + +# array_has([1, 3, 5], 1) -> true (array contains element) +# array_has([], 1) -> false (empty array, not null) +# array_has(null, 1) -> null (null array) +query BB +select array_has(column1, column2), array_has(null, column2) +from array_has_table_empty; +---- +true NULL +false NULL +NULL NULL + +# Test for issue: array_has should return false for empty arrays, not null +# This test demonstrates the correct behavior with COALESCE to show the distinction +# array_has([1, 3, 5], 1) -> 'true' +# array_has([], 1) -> 'false' (empty array should return false) +# array_has(null, 1) -> 'null' (null array should return null) +query ?T +SELECT column1, COALESCE(CAST(array_has(column1, column2) AS VARCHAR), 'null') +from array_has_table_empty; +---- +[1, 3, 5] true +[] false +NULL null + +query B +select array_has(column1, column2) +from fixed_size_array_has_table_1D; +---- +true +false + +query BB +select array_has_all(column3, column4), + array_has_any(column5, column6) +from fixed_size_array_has_table_1D; +---- +true true +false false + +query BBB +select array_has(column1, column2), + array_has_all(column3, column4), + array_has_any(column5, column6) +from array_has_table_1D_Float; +---- +true true false +false false true + +query BBB +select array_has(arrow_cast(column1, 'LargeList(Float64)'), column2), + array_has_all(arrow_cast(column3, 'LargeList(Float64)'), arrow_cast(column4, 'LargeList(Float64)')), + array_has_any(arrow_cast(column5, 'LargeList(Float64)'), arrow_cast(column6, 'LargeList(Float64)')) +from array_has_table_1D_Float; +---- +true true false +false false true + +query B +select array_has(column1, column2) +from fixed_size_array_has_table_1D_Float; +---- +true +false + +query BB +select array_has_all(column3, column4), + array_has_any(column5, column6) +from fixed_size_array_has_table_1D_Float; +---- +true true +false true + +query BBB +select array_has(column1, column2), + array_has_all(column3, column4), + array_has_any(column5, column6) +from array_has_table_1D_Boolean; +---- +false true true +true true true + +query BBB +select array_has(arrow_cast(column1, 'LargeList(Boolean)'), column2), + array_has_all(arrow_cast(column3, 'LargeList(Boolean)'), arrow_cast(column4, 'LargeList(Boolean)')), + array_has_any(arrow_cast(column5, 'LargeList(Boolean)'), arrow_cast(column6, 'LargeList(Boolean)')) +from array_has_table_1D_Boolean; +---- +false true true +true true true + +query B +select array_has(column1, column2) +from fixed_size_array_has_table_1D_Boolean; +---- +false +true + +query BB +select array_has_all(column3, column4), + array_has_any(column5, column6) +from fixed_size_array_has_table_1D_Boolean; +---- +true true +true true + +query BBBBBBBB +select array_has_all(column3, arrow_cast(column4,'LargeList(Boolean)')), + array_has_any(column5, arrow_cast(column6,'LargeList(Boolean)')), + array_has_all(column3, arrow_cast(column4,'List(Boolean)')), + array_has_any(column5, arrow_cast(column6,'List(Boolean)')), + array_has_all(arrow_cast(column3, 'LargeList(Boolean)'), column4), + array_has_any(arrow_cast(column5, 'LargeList(Boolean)'), column6), + array_has_all(arrow_cast(column3, 'List(Boolean)'), column4), + array_has_any(arrow_cast(column5, 'List(Boolean)'), column6) +from fixed_size_array_has_table_1D_Boolean; +---- +true true true true true true true true +true true true true true true true true + +query BBB +select array_has(column1, column2), + array_has_all(column3, column4), + array_has_any(column5, column6) +from array_has_table_1D_UTF8; +---- +true true false +false false true + +query BBB +select array_has(arrow_cast(column1, 'LargeList(Utf8)'), column2), + array_has_all(arrow_cast(column3, 'LargeList(Utf8)'), arrow_cast(column4, 'LargeList(Utf8)')), + array_has_any(arrow_cast(column5, 'LargeList(Utf8)'), arrow_cast(column6, 'LargeList(Utf8)')) +from array_has_table_1D_UTF8; +---- +true true false +false false true + +query B +select array_has(column1, column2) +from fixed_size_array_has_table_1D_UTF8; +---- +true +false + +query BB +select array_has(column1, column2), + array_has_all(column3, column4) +from array_has_table_2D; +---- +false true +true false + +query BB +select array_has(arrow_cast(column1, 'LargeList(List(Int64))'), column2), + array_has_all(arrow_cast(column3, 'LargeList(List(Int64))'), arrow_cast(column4, 'LargeList(List(Int64))')) +from array_has_table_2D; +---- +false true +true false + +query B +select array_has(arrow_cast(column1, 'LargeList(List(Int64))'), column2) +from fixed_size_array_has_table_2D; +---- +false +false + +query B +select array_has_all(arrow_cast(column3, 'LargeList(List(Int64))'), arrow_cast(column4, 'LargeList(List(Int64))')) +from fixed_size_array_has_table_2D; +---- +true +false + +query B +select array_has_all(column1, column2) +from array_has_table_2D_float; +---- +true +false + +query B +select array_has_all(arrow_cast(column1, 'LargeList(List(Float64))'), arrow_cast(column2, 'LargeList(List(Float64))')) +from array_has_table_2D_float; +---- +true +false + +query B +select array_has_all(column1, column2) +from fixed_size_array_has_table_2D_float; +---- +false +false + +query B +select array_has(column1, column2) from array_has_table_3D; +---- +false +true +false +false +true +false +true + +query B +select array_has(arrow_cast(column1, 'LargeList(List(List(Int64)))'), column2) from array_has_table_3D; +---- +false +true +false +false +true +false +true + +query B +select array_has(column1, column2) from fixed_size_array_has_table_3D; +---- +false +false +false +false +true +true +true + +query BBBB +select array_has(column1, make_array(5, 6)), + array_has(column1, make_array(7, NULL)), + array_has(column2, 5.5), + array_has(column3, 'o') +from arrays; +---- +false false false true +true false true false +true false false true +false true false false +NULL NULL false false +false false NULL false +false false false NULL + +query BBBB +select array_has(arrow_cast(column1, 'LargeList(List(Int64))'), make_array(5, 6)), + array_has(arrow_cast(column1, 'LargeList(List(Int64))'), make_array(7, NULL)), + array_has(arrow_cast(column2, 'LargeList(Float64)'), 5.5), + array_has(arrow_cast(column3, 'LargeList(Utf8)'), 'o') +from arrays; +---- +false false false true +true false true false +true false false true +false true false false +NULL NULL false false +false false NULL false +false false false NULL + +# Row 1: [[NULL,2],[3,NULL]], [1.1,2.2,3.3], ['L','o','r','e','m'] +# Row 2: [[3,4],[5,6]], [NULL,5.5,6.6], ['i','p',NULL,'u','m'] +# Row 3: [[5,6],[7,8]], [7.7,8.8,9.9], ['d',NULL,'l','o','r'] +# Row 4: [[7,NULL],[9,10]], [10.1,NULL,12.2], ['s','i','t','a','b'] +# Row 5: NULL, [13.3,14.4,15.5], ['a','m','e','t','x'] +# Row 6: [[11,12],[13,14]], NULL, [',','a','b','c','d'] +# Row 7: [[15,16],[NULL,18]], [16.6,17.7,18.8], NULL +query BBBB +select array_has(column1, make_array(5, 6)), + array_has(column1, make_array(7, NULL)), + array_has(column2, 5.5), + array_has(column3, 'o') +from fixed_size_arrays; +---- +false false false true +true false true false +true false false true +false true false false +NULL NULL false false +false false NULL false +false false false NULL + +query BBBB +select array_has_all(make_array(1,2,3), []), + array_has_any(make_array(1,2,3), []), + array_has_all(make_array('aa','bb','cc'), []), + array_has_any(make_array('aa','bb','cc'), []) +; +---- +true false true false + +query BBBBBBBBBBBBB +select array_has_all(make_array(1,2,3), make_array(1,3)), + array_has_all(make_array(1,2,3), make_array(1,4)), + array_has_all(make_array([1,2], [3,4]), make_array([1,2])), + array_has_all(make_array([1,2], [3,4]), make_array([1,3])), + array_has_all(make_array([1,2], [3,4]), make_array([1,2], [3,4], [5,6])), + array_has_all(make_array([[1,2,3]]), make_array([[1]])), + array_has_all(make_array([[1,2,3]]), make_array([[1,2,3]])), + array_has_any(make_array(1,2,3), make_array(1,10,100)), + array_has_any(make_array(1,2,3), make_array(10,100)), + array_has_any(make_array([1,2], [3,4]), make_array([1,10], [10,4])), + array_has_any(make_array([1,2], [3,4]), make_array([10,20], [3,4])), + array_has_any(make_array([[1,2,3]]), make_array([[1,2,3], [4,5,6]])), + array_has_any(make_array([[1,2,3]]), make_array([[1,2,3]], [[4,5,6]])) +; +---- +true false true false false false true true false false true false true + +query BBBBBBBBBBBBB +select array_has_all(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), arrow_cast(make_array(1,3), 'LargeList(Int64)')), + array_has_all(arrow_cast(make_array(1,2,3),'LargeList(Int64)'), arrow_cast(make_array(1,4), 'LargeList(Int64)')), + array_has_all(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,2]), 'LargeList(List(Int64))')), + array_has_all(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,3]), 'LargeList(List(Int64))')), + array_has_all(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,2], [3,4], [5,6]), 'LargeList(List(Int64))')), + array_has_all(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1]]), 'LargeList(List(List(Int64)))')), + array_has_all(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))')), + array_has_any(arrow_cast(make_array(1,2,3),'LargeList(Int64)'), arrow_cast(make_array(1,10,100), 'LargeList(Int64)')), + array_has_any(arrow_cast(make_array(1,2,3),'LargeList(Int64)'), arrow_cast(make_array(10,100),'LargeList(Int64)')), + array_has_any(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,10], [10,4]), 'LargeList(List(Int64))')), + array_has_any(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([10,20], [3,4]), 'LargeList(List(Int64))')), + array_has_any(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1,2,3], [4,5,6]]), 'LargeList(List(List(Int64)))')), + array_has_any(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1,2,3]], [[4,5,6]]), 'LargeList(List(List(Int64)))')) +; +---- +true false true false false false true true false false true false true + +query BBBBBBBBBBBBB +select array_has_all(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 3), 'FixedSizeList(2, Int64)')), + array_has_all(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 4), 'FixedSizeList(2, Int64)')), + array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,2]), 'FixedSizeList(1, List(Int64))')), + array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,3]), 'FixedSizeList(1, List(Int64))')), + array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,2], [3,4], [5,6]), 'FixedSizeList(3, List(Int64))')), + array_has_all(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1]]), 'FixedSizeList(1, List(List(Int64)))')), + array_has_all(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))')), + array_has_any(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(1,10,100), 'FixedSizeList(3, Int64)')), + array_has_any(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(10, 100),'FixedSizeList(2, Int64)')), + array_has_any(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,10], [10,4]), 'FixedSizeList(2, List(Int64))')), + array_has_any(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([10,20], [3,4]), 'FixedSizeList(2, List(Int64))')), + array_has_any(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3], [4,5,6]]), 'FixedSizeList(1, List(List(Int64)))')), + array_has_any(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3]], [[4,5,6]]), 'FixedSizeList(2, List(List(Int64)))')) +; +---- +true false true false false false true true false false true false true + +# rewrite various array_has operations to InList where the haystack is a literal list +# NB that `col in (a, b, c)` is simplified to OR if there are <= 3 elements, so we make 4-element haystack lists + +query I +with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE needle IN ('7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'); +---- +1 + +query TT +explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE needle IN ('7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'); +---- +logical_plan +01)Projection: count(Int64(1)) AS count(*) +02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +03)----SubqueryAlias: test +04)------SubqueryAlias: t +05)--------Projection: +06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) +07)------------TableScan: generate_series() projection=[value] +physical_plan +01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] +05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] + +query I +with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE needle = ANY(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c']); +---- +1 + +query TT +explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE needle = ANY(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c']); +---- +logical_plan +01)Projection: count(Int64(1)) AS count(*) +02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +03)----SubqueryAlias: test +04)------SubqueryAlias: t +05)--------Projection: +06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) +07)------------TableScan: generate_series() projection=[value] +physical_plan +01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] +05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] + +query I +with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], needle); +---- +1 + +query TT +explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], needle); +---- +logical_plan +01)Projection: count(Int64(1)) AS count(*) +02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +03)----SubqueryAlias: test +04)------SubqueryAlias: t +05)--------Projection: +06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) +07)------------TableScan: generate_series() projection=[value] +physical_plan +01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] +05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] + +query I +with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'LargeList(Utf8View)'), needle); +---- +1 + +query TT +explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'LargeList(Utf8View)'), needle); +---- +logical_plan +01)Projection: count(Int64(1)) AS count(*) +02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +03)----SubqueryAlias: test +04)------SubqueryAlias: t +05)--------Projection: +06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) +07)------------TableScan: generate_series() projection=[value] +physical_plan +01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] +05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] + +query I +with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'FixedSizeList(4, Utf8View)'), needle); +---- +1 + +query TT +explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'FixedSizeList(4, Utf8View)'), needle); +---- +logical_plan +01)Projection: count(Int64(1)) AS count(*) +02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +03)----SubqueryAlias: test +04)------SubqueryAlias: t +05)--------Projection: +06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) +07)------------TableScan: generate_series() projection=[value] +physical_plan +01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] +05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] + +query I +with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has([needle], needle); +---- +100000 + +# The optimizer does not currently eliminate the filter; +# Instead, it's rewritten as `IS NULL OR NOT NULL` due to SQL null semantics +query TT +explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has([needle], needle); +---- +logical_plan +01)Projection: count(Int64(1)) AS count(*) +02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +03)----SubqueryAlias: test +04)------SubqueryAlias: t +05)--------Projection: +06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IS NOT NULL OR Boolean(NULL) +07)------------TableScan: generate_series() projection=[value] +physical_plan +01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] +05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IS NOT NULL OR NULL, projection=[] +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] + +# any operator +query ? +select column3 from arrays where 'L'=any(column3); +---- +[L, o, r, e, m] + +query I +select count(*) from arrays where 'L'=any(column3); +---- +1 + +query I +select count(*) from arrays where 'X'=any(column3); +---- +0 + +# any operator with comparison operators +# Use inline arrays so the test data is visible and the needle (5) +# falls within the range of some arrays but not others. +statement ok +CREATE TABLE any_op_test AS VALUES + (1, make_array(1, 2, 3)), + (2, make_array(4, 5, 6)), + (3, make_array(7, 8, 9)), + (4, make_array(3, 5, 7)); + +# 5 > ANY(arr): true when array_min < 5 +# row1: min=1 < 5 ✓, row2: min=4 < 5 ✓, row3: min=7 < 5 ✗, row4: min=3 < 5 ✓ +query I? +select column1, column2 from any_op_test where 5 > any(column2) order by column1; +---- +1 [1, 2, 3] +2 [4, 5, 6] +4 [3, 5, 7] + +# 5 >= ANY(arr): true when array_min <= 5 +# row1: min=1 <= 5 ✓, row2: min=4 <= 5 ✓, row3: min=7 <= 5 ✗, row4: min=3 <= 5 ✓ +query I? +select column1, column2 from any_op_test where 5 >= any(column2) order by column1; +---- +1 [1, 2, 3] +2 [4, 5, 6] +4 [3, 5, 7] + +# 5 < ANY(arr): true when array_max > 5 +# row1: max=3 > 5 ✗, row2: max=6 > 5 ✓, row3: max=9 > 5 ✓, row4: max=7 > 5 ✓ +query I? +select column1, column2 from any_op_test where 5 < any(column2) order by column1; +---- +2 [4, 5, 6] +3 [7, 8, 9] +4 [3, 5, 7] + +# 5 <= ANY(arr): true when array_max >= 5 +# row1: max=3 >= 5 ✗, row2: max=6 >= 5 ✓, row3: max=9 >= 5 ✓, row4: max=7 >= 5 ✓ +query I? +select column1, column2 from any_op_test where 5 <= any(column2) order by column1; +---- +2 [4, 5, 6] +3 [7, 8, 9] +4 [3, 5, 7] + +# 5 <> ANY(arr): true when array_min != 5 OR array_max != 5 +# row1: [1,2,3] min=1!=5 ✓, row2: [4,5,6] min=4!=5 ✓, row3: [7,8,9] min=7!=5 ✓, row4: [3,5,7] min=3!=5 ✓ +query I? +select column1, column2 from any_op_test where 5 <> any(column2) order by column1; +---- +1 [1, 2, 3] +2 [4, 5, 6] +3 [7, 8, 9] +4 [3, 5, 7] + +# For a single-element array where the element equals the needle, <> should return false +query B +select 5 <> any(make_array(5)); +---- +false + +# For a uniform array [5,5,5], <> should also return false +query B +select 5 <> any(make_array(5, 5, 5)); +---- +false + +# Empty array: all operators should return false (no elements satisfy the condition) +query B +select 5 = any(make_array()); +---- +false + +query B +select 5 <> any(make_array()); +---- +false + +query B +select 5 > any(make_array()); +---- +false + +query B +select 5 < any(make_array()); +---- +false + +query B +select 5 >= any(make_array()); +---- +false + +query B +select 5 <= any(make_array()); +---- +false + +# Mixed NULL + non-NULL array where no non-NULL element satisfies the condition +# These return false (NULLs are skipped by array_min/array_max) +query B +select 5 > any(make_array(6, NULL)); +---- +false + +query B +select 5 < any(make_array(3, NULL)); +---- +false + +query B +select 5 >= any(make_array(6, NULL)); +---- +false + +query B +select 5 <= any(make_array(3, NULL)); +---- +false + +# Mixed NULL + non-NULL array where a non-NULL element satisfies the condition +query B +select 5 > any(make_array(3, NULL)); +---- +true + +query B +select 5 < any(make_array(6, NULL)); +---- +true + +query B +select 5 >= any(make_array(5, NULL)); +---- +true + +query B +select 5 <= any(make_array(5, NULL)); +---- +true + +query B +select 5 <> any(make_array(3, NULL)); +---- +true + +query B +select 5 <> any(make_array(5, NULL)); +---- +false + +# All-NULL array: all operators should return false +query B +select 5 > any(make_array(NULL::INT, NULL::INT)); +---- +false + +query B +select 5 < any(make_array(NULL::INT, NULL::INT)); +---- +false + +query B +select 5 >= any(make_array(NULL::INT, NULL::INT)); +---- +false + +query B +select 5 <= any(make_array(NULL::INT, NULL::INT)); +---- +false + +query B +select 5 <> any(make_array(NULL::INT, NULL::INT)); +---- +false + +# NULL left operand: should return NULL for non-empty arrays +query B +select NULL > any(make_array(1, 2, 3)); +---- +NULL + +query B +select NULL < any(make_array(1, 2, 3)); +---- +NULL + +query B +select NULL >= any(make_array(1, 2, 3)); +---- +NULL + +query B +select NULL <= any(make_array(1, 2, 3)); +---- +NULL + +query B +select NULL <> any(make_array(1, 2, 3)); +---- +NULL + +# NULL left operand with empty array: should return false +query B +select NULL > any(make_array()); +---- +false + +# NULL array: should return NULL +query B +select 5 > any(NULL::INT[]); +---- +NULL + +query B +select 5 < any(NULL::INT[]); +---- +NULL + +query B +select 5 >= any(NULL::INT[]); +---- +NULL + +query B +select 5 <= any(NULL::INT[]); +---- +NULL + +query B +select 5 <> any(NULL::INT[]); +---- +NULL + +statement ok +DROP TABLE any_op_test; + +## array_distinct + +#TODO: https://github.com/apache/datafusion/issues/7142 +#query ? +#select array_distinct(null); +#---- +#NULL + +# test with empty row, the row that does not match the condition has row count 0 +statement ok +create table t1(a int, b int) as values (100, 1), (101, 2), (102, 3), (101, 2); + +# rowsort is to ensure the order of group by is deterministic, array_sort has no effect here, since the sum() always returns single row. +query ? rowsort +select array_distinct([sum(a)]) from t1 where a > 100 group by b; +---- +[102] +[202] + +statement ok +drop table t1; + +query ? +select array_distinct(a) from values ([1, 2, 3]), (null), ([1, 3, 1]) as X(a); +---- +[1, 2, 3] +NULL +[1, 3] + +query ? +select array_distinct(arrow_cast(null, 'LargeList(Int64)')); +---- +NULL + +query ? +select array_distinct([]); +---- +[] + +query ? +select array_distinct([[], []]); +---- +[[]] + +query ? +select array_distinct(column1) +from array_distinct_table_1D; +---- +[1, 2, 3] +[1, 2, 3, 4, 5] +[3, 5] + +query ? +select array_distinct(column1) +from array_distinct_table_1D_UTF8; +---- +[a, bc, def] +[a, bc, def, defg] +[defg] + +query ? +select array_distinct(column1) +from array_distinct_table_2D; +---- +[[1, 2], [3, 4], [5, 6]] +[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] +[[5, 6], NULL] + +query ? +select array_distinct(column1) +from array_distinct_table_1D_large; +---- +[1, 2, 3] +[1, 2, 3, 4, 5] +[3, 5] + +query ? +select array_distinct(column1) +from array_distinct_table_1D_fixed; +---- +[1, 2, 3] +[1, 2, 3, 4, 5] +[3, 5] + +query ? +select array_distinct(column1) +from array_distinct_table_1D_UTF8_fixed; +---- +[a, bc, def] +[a, bc, def, defg] +[defg] + +query ? +select array_distinct(column1) +from array_distinct_table_2D_fixed; +---- +[[1, 2], [3, 4], [5, 6]] +[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] +[[5, 6], NULL] + +## arrays_zip (aliases: `list_zip`) + +# Spark example: arrays_zip(array(1, 2, 3), array(2, 3, 4)) +query ? +select arrays_zip([1, 2, 3], [2, 3, 4]); +---- +[{1: 1, 2: 2}, {1: 2, 2: 3}, {1: 3, 2: 4}] + +# Spark example: arrays_zip(array(1, 2), array(2, 3), array(3, 4)) +query ? +select arrays_zip([1, 2], [2, 3], [3, 4]); +---- +[{1: 1, 2: 2, 3: 3}, {1: 2, 2: 3, 3: 4}] + +# basic: two integer arrays of equal length +query ? +select arrays_zip([1, 2, 3], [10, 20, 30]); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] + +# basic: two arrays with different element types (int + string) +query ? +select arrays_zip([1, 2, 3], ['a', 'b', 'c']); +---- +[{1: 1, 2: a}, {1: 2, 2: b}, {1: 3, 2: c}] + +# three arrays of equal length +query ? +select arrays_zip([1, 2, 3], [10, 20, 30], [100, 200, 300]); +---- +[{1: 1, 2: 10, 3: 100}, {1: 2, 2: 20, 3: 200}, {1: 3, 2: 30, 3: 300}] + +# four arrays of equal length +query ? +select arrays_zip([1], [2], [3], [4]); +---- +[{1: 1, 2: 2, 3: 3, 4: 4}] + +# mixed element types: float + boolean +query ? +select arrays_zip([1.5, 2.5], [true, false]); +---- +[{1: 1.5, 2: true}, {1: 2.5, 2: false}] + +# different length arrays: shorter array padded with NULLs +query ? +select arrays_zip([1, 2], [3, 4, 5]); +---- +[{1: 1, 2: 3}, {1: 2, 2: 4}, {1: NULL, 2: 5}] + +# different length arrays: first longer +query ? +select arrays_zip([1, 2, 3], [10]); +---- +[{1: 1, 2: 10}, {1: 2, 2: NULL}, {1: 3, 2: NULL}] + +# different length: one single element, other three elements +query ? +select arrays_zip([1], ['a', 'b', 'c']); +---- +[{1: 1, 2: a}, {1: NULL, 2: b}, {1: NULL, 2: c}] + +# empty arrays +query ? +select arrays_zip([], []); +---- +[] + +# one empty, one non-empty +query ? +select arrays_zip([], [1, 2, 3]); +---- +[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}] + +# NULL elements inside arrays +query ? +select arrays_zip([1, NULL, 3], ['a', 'b', 'c']); +---- +[{1: 1, 2: a}, {1: NULL, 2: b}, {1: 3, 2: c}] + +# all NULL elements +query ? +select arrays_zip([NULL::int, NULL, NULL], [NULL::text, NULL, NULL]); +---- +[{1: NULL, 2: NULL}, {1: NULL, 2: NULL}, {1: NULL, 2: NULL}] + +# both args are NULL (entire list null) +query ? +select arrays_zip(NULL::int[], NULL::int[]); +---- +NULL + +# one arg is NULL list, other is real array +query ? +select arrays_zip(NULL::int[], [1, 2, 3]); +---- +[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}] + +# real array + NULL list +query ? +select arrays_zip([1, 2], NULL::text[]); +---- +[{1: 1, 2: NULL}, {1: 2, 2: NULL}] + +# column-level test with multiple rows +query ? +select arrays_zip(a, b) from (values ([1, 2], [10, 20]), ([3, 4, 5], [30]), ([6], [60, 70])) as t(a, b); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}] +[{1: 3, 2: 30}, {1: 4, 2: NULL}, {1: 5, 2: NULL}] +[{1: 6, 2: 60}, {1: NULL, 2: 70}] + +# column-level test with NULL rows +query ? +select arrays_zip(a, b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}] +[{1: NULL, 2: 30}, {1: NULL, 2: 40}] +[{1: 5, 2: NULL}, {1: 6, 2: NULL}] + +# column-level test with single argument +query ? +select arrays_zip(a) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); +---- +[{1: 1}, {1: 2}] +NULL +[{1: 5}, {1: 6}] + +query ? +select arrays_zip(b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); +---- +[{1: 10}, {1: 20}] +[{1: 30}, {1: 40}] +NULL + +# No input +query error Error during planning: 'arrays_zip' does not support zero arguments +select arrays_zip(); + +# Non-array input +query error DataFusion error: Execution error: arrays_zip expects array arguments, got Int64 +select arrays_zip(1, 2); + +# null input +query ? +select arrays_zip(null) +---- +NULL + +# single empty array +query ? +select arrays_zip([]) +---- +[] + + +# single array of null +query ? +select arrays_zip([null]) +---- +[{1: NULL}] + +query ? +select arrays_zip([NULL::int]) +---- +[{1: NULL}] + +query ? +select arrays_zip([NULL::int[]]) +---- +[{1: NULL}] + +# alias: list_zip +query ? +select list_zip([1, 2], [3, 4]); +---- +[{1: 1, 2: 3}, {1: 2, 2: 4}] + +# column test: total values equal (3 each) but per-row lengths differ +# a: [1] b: [10, 20] → row 0: a has 1, b has 2 +# a: [2, 3] b: [30] → row 1: a has 2, b has 1 +# total a values = 3, total b values = 3 (same!) but rows are misaligned +query ? +select arrays_zip(a, b) from (values ([1], [10, 20]), ([2, 3], [30])) as t(a, b); +---- +[{1: 1, 2: 10}, {1: NULL, 2: 20}] +[{1: 2, 2: 30}, {1: 3, 2: NULL}] + +# single element arrays +query ? +select arrays_zip([42], ['hello']); +---- +[{1: 42, 2: hello}] + +# single argument +query ? +select arrays_zip([1, 2, 3]); +---- +[{1: 1}, {1: 2}, {1: 3}] + +# arrays_zip with LargeList inputs +query ? +select arrays_zip( + arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), + arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)') +); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] + +# arrays_zip with LargeList different lengths (padding) +query ? +select arrays_zip( + arrow_cast(make_array(1, 2), 'LargeList(Int64)'), + arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)') +); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: NULL, 2: 30}] + +# single argument from LargeList +query ? +select arrays_zip(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')); +---- +[{1: 1}, {1: 2}, {1: 3}] + +# arrays_zip with FixedSizeList inputs +query ? +select arrays_zip( + arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), + arrow_cast(make_array(10, 20, 30), 'FixedSizeList(3, Int64)') +); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] + +# single argument from FixedSizeList +query ? +select arrays_zip(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')); +---- +[{1: 1}, {1: 2}, {1: 3}] + +# arrays_zip mixing List and LargeList +query ? +select arrays_zip( + [1, 2, 3], + arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)') +); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] + +# arrays_zip mixing List and FixedSizeList with different lengths (padding) +query ? +select arrays_zip( + [1, 2, 3], + arrow_cast(make_array(10, 20), 'FixedSizeList(2, Int64)') +); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: NULL}] + +# arrays_zip with LargeList and FixedSizeList mixed types +query ? +select arrays_zip( + arrow_cast(make_array(1, 2), 'LargeList(Int64)'), + arrow_cast(make_array('a', 'b'), 'FixedSizeList(2, Utf8)') +); +---- +[{1: 1, 2: a}, {1: 2, 2: b}] + +query ??? +select array_intersect(column1, column2), + array_intersect(column3, column4), + array_intersect(column5, column6) +from array_intersect_table_1D; +---- +[1] [1, 3] [1, 3] +[11] [11, 33] [11, 33] + +query ??? +select array_intersect(column1, column2), + array_intersect(column3, column4), + array_intersect(column5, column6) +from large_array_intersect_table_1D; +---- +[1] [1, 3] [1, 3] +[11] [11, 33] [11, 33] + +query ??? +select array_intersect(column1, column2), + array_intersect(column3, column4), + array_intersect(column5, column6) +from array_intersect_table_1D_Float; +---- +[1.0] [1.0, 3.0] [] +[] [2.0] [1.11] + +query ??? +select array_intersect(column1, column2), + array_intersect(column3, column4), + array_intersect(column5, column6) +from array_intersect_table_1D_Boolean; +---- +[] [true, false] [false] +[false] [true] [true] + +query ??? +select array_intersect(column1, column2), + array_intersect(column3, column4), + array_intersect(column5, column6) +from large_array_intersect_table_1D_Boolean; +---- +[] [true, false] [false] +[false] [true] [true] + +query ??? +select array_intersect(column1, column2), + array_intersect(column3, column4), + array_intersect(column5, column6) +from array_intersect_table_1D_UTF8; +---- +[bc] [rust, arrow] [] +[] [datafusion, rust, arrow] [rust, arrow] + +query ??? +select array_intersect(column1, column2), + array_intersect(column3, column4), + array_intersect(column5, column6) +from large_array_intersect_table_1D_UTF8; +---- +[bc] [rust, arrow] [] +[] [datafusion, rust, arrow] [rust, arrow] + +query ? +select array_intersect(column1, column2) +from array_intersect_table_1D_NULL; +---- +[2, 3] +[3] +[3] +NULL +NULL +NULL + +query ?? +select array_intersect(column1, column2), + array_intersect(column3, column4) +from array_intersect_table_2D; +---- +[] [[4, 5], [6, 7]] +[[3, 4]] [[5, 6, 7], [8, 9, 10]] + +query ?? +select array_intersect(column1, column2), + array_intersect(column3, column4) +from large_array_intersect_table_2D; +---- +[] [[4, 5], [6, 7]] +[[3, 4]] [[5, 6, 7], [8, 9, 10]] + + +query ? +select array_intersect(column1, column2) +from array_intersect_table_2D_float; +---- +[[1.1, 2.2], [3.3]] +[[1.1, 2.2], [3.3]] + +query ? +select array_intersect(column1, column2) +from large_array_intersect_table_2D_float; +---- +[[1.1, 2.2], [3.3]] +[[1.1, 2.2], [3.3]] + +query ? +select array_intersect(column1, column2) +from array_intersect_table_3D; +---- +[] +[[[1, 2]]] + +query ? +select array_intersect(column1, column2) +from large_array_intersect_table_3D; +---- +[] +[[[1, 2]]] + +query ?????? +SELECT array_intersect(make_array(1,2,3), make_array(2,3,4)), + array_intersect(make_array(1,3,5), make_array(2,4,6)), + array_intersect(make_array('aa','bb','cc'), make_array('cc','aa','dd')), + array_intersect(make_array(true, false), make_array(true)), + array_intersect(make_array(1.1, 2.2, 3.3), make_array(2.2, 3.3, 4.4)), + array_intersect(make_array([1, 1], [2, 2], [3, 3]), make_array([2, 2], [3, 3], [4, 4])) +; +---- +[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] + +query ?????? +SELECT array_intersect(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), arrow_cast(make_array(2,3,4), 'LargeList(Int64)')), + array_intersect(arrow_cast(make_array(1,3,5), 'LargeList(Int64)'), arrow_cast(make_array(2,4,6), 'LargeList(Int64)')), + array_intersect(arrow_cast(make_array('aa','bb','cc'), 'LargeList(Utf8)'), arrow_cast(make_array('cc','aa','dd'), 'LargeList(Utf8)')), + array_intersect(arrow_cast(make_array(true, false), 'LargeList(Boolean)'), arrow_cast(make_array(true), 'LargeList(Boolean)')), + array_intersect(arrow_cast(make_array(1.1, 2.2, 3.3), 'LargeList(Float64)'), arrow_cast(make_array(2.2, 3.3, 4.4), 'LargeList(Float64)')), + array_intersect(arrow_cast(make_array([1, 1], [2, 2], [3, 3]), 'LargeList(List(Int64))'), arrow_cast(make_array([2, 2], [3, 3], [4, 4]), 'LargeList(List(Int64))')) +; +---- +[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] + +query ?????? +SELECT array_intersect(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(2,3,4), 'FixedSizeList(3, Int64)')), + array_intersect(arrow_cast(make_array(1,3,5), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(2,4,6), 'FixedSizeList(3, Int64)')), + array_intersect(arrow_cast(make_array('aa','bb','cc'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('cc','aa','dd'), 'FixedSizeList(3, Utf8)')), + array_intersect(arrow_cast(make_array(true, false), 'FixedSizeList(2, Boolean)'), arrow_cast(make_array(true), 'FixedSizeList(1, Boolean)')), + array_intersect(arrow_cast(make_array(1.1, 2.2, 3.3), 'FixedSizeList(3, Float64)'), arrow_cast(make_array(2.2, 3.3, 4.4), 'FixedSizeList(3, Float64)')), + array_intersect(arrow_cast(make_array([1, 1], [2, 2], [3, 3]), 'FixedSizeList(3, List(Int64))'), arrow_cast(make_array([2, 2], [3, 3], [4, 4]), 'FixedSizeList(3, List(Int64))')) +; +---- +[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] + +query ? +select array_intersect([], []); +---- +[] + +query ? +select array_intersect(arrow_cast([], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Int64)')); +---- +[] + +query ? +select array_intersect([1, 1, 2, 2, 3, 3], null); +---- +NULL + +query ? +select array_intersect(arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)'), null); +---- +NULL + +query ? +select array_intersect(null, [1, 1, 2, 2, 3, 3]); +---- +NULL + +query ? +select array_intersect(null, arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)')); +---- +NULL + +query ? +select array_intersect([], null); +---- +NULL + +query ? +select array_intersect([[1,2,3]], [[]]); +---- +[] + +query ? +select array_intersect([[null]], [[]]); +---- +[] + +query ? +select array_intersect(arrow_cast([], 'LargeList(Int64)'), null); +---- +NULL + +query ? +select array_intersect(null, []); +---- +NULL + +query ? +select array_intersect(null, arrow_cast([], 'LargeList(Int64)')); +---- +NULL + +query ? +select array_intersect(null, null); +---- +NULL + +query ?????? +SELECT list_intersect(make_array(1,2,3), make_array(2,3,4)), + list_intersect(make_array(1,3,5), make_array(2,4,6)), + list_intersect(make_array('aa','bb','cc'), make_array('cc','aa','dd')), + list_intersect(make_array(true, false), make_array(true)), + list_intersect(make_array(1.1, 2.2, 3.3), make_array(2.2, 3.3, 4.4)), + list_intersect(make_array([1, 1], [2, 2], [3, 3]), make_array([2, 2], [3, 3], [4, 4])) +; +---- +[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] + +query ?????? +SELECT list_intersect(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), arrow_cast(make_array(2,3,4), 'LargeList(Int64)')), + list_intersect(arrow_cast(make_array(1,3,5), 'LargeList(Int64)'), arrow_cast(make_array(2,4,6), 'LargeList(Int64)')), + list_intersect(arrow_cast(make_array('aa','bb','cc'), 'LargeList(Utf8)'), arrow_cast(make_array('cc','aa','dd'), 'LargeList(Utf8)')), + list_intersect(arrow_cast(make_array(true, false), 'LargeList(Boolean)'), arrow_cast(make_array(true), 'LargeList(Boolean)')), + list_intersect(arrow_cast(make_array(1.1, 2.2, 3.3), 'LargeList(Float64)'), arrow_cast(make_array(2.2, 3.3, 4.4), 'LargeList(Float64)')), + list_intersect(arrow_cast(make_array([1, 1], [2, 2], [3, 3]), 'LargeList(List(Int64))'), arrow_cast(make_array([2, 2], [3, 3], [4, 4]), 'LargeList(List(Int64))')) +; +---- +[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] + +query BBBB +select list_has_all(make_array(1,2,3), make_array(4,5,6)), + list_has_all(make_array(1,2,3), make_array(1,2)), + list_has_any(make_array(1,2,3), make_array(4,5,6)), + list_has_any(make_array(1,2,3), make_array(1,2,4)) +; +---- +false true false true + +query BBBB +select arrays_overlap(make_array(1,2,3), make_array(4,5,6)), + arrays_overlap(make_array(1,2,3), make_array(1,2,4)), + arrays_overlap(make_array(['aa']), make_array(['aa'],['bb'])), + arrays_overlap(make_array('aa',NULL), make_array('bb',NULL)) +; +---- +false true true true + +query ??? +select range(column2), + range(column1, column2), + range(column1, column2, column3) +from arrays_range; +---- +[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] [3, 4, 5, 6, 7, 8, 9] [3, 5, 7, 9] +[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] [4, 5, 6, 7, 8, 9, 10, 11, 12] [4, 7, 10] + +query ??????????? +select range(5), + range(2, 5), + range(2, 10, 3), + range(10, 2, -3), + range(1, 5, -1), + range(1, -5, 1), + range(1, -5, -1), + range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH), + range(DATE '1993-02-01', DATE '1993-01-01', INTERVAL '-1' DAY), + range(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '1' YEAR), + range(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR) +; +---- +[0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8] [10, 7, 4] [] [] [1, 0, -1, -2, -3, -4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02] [1989-04-01, 1990-04-01, 1991-04-01] [] + +# Ensure can coerce from other valid types +query ??????????? +select range(5), + range(2, 5), + range(2, 10, 3), + range(10, 2, -3), + range(arrow_cast(1, 'Int8'), 5, -1), + range(arrow_cast(1, 'Int16'), arrow_cast(-5, 'Int8'), 1), + range(arrow_cast(1, 'Int32'), arrow_cast(-5, 'Int16'), arrow_cast(-1, 'Int8')), + range(DATE '1992-09-01', DATE '1993-03-01', arrow_cast('1 MONTH', 'Interval(YearMonth)')), + range(DATE '1993-02-01', arrow_cast(DATE '1993-01-01', 'Date64'), INTERVAL '-1' DAY), + range(arrow_cast(DATE '1989-04-01', 'Date64'), DATE '1993-03-01', INTERVAL '1' YEAR), + range(arrow_cast(DATE '1993-03-01', 'Date64'), arrow_cast(DATE '1989-04-01', 'Date64'), INTERVAL '1' YEAR) +; +---- +[0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8] [10, 7, 4] [] [] [1, 0, -1, -2, -3, -4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02] [1989-04-01, 1990-04-01, 1991-04-01] [] + +# Test range with zero step +query error DataFusion error: Execution error: step can't be 0 for function range\(start \[, stop, step\]\) +select range(1, 1, 0); + +# Test range with big steps +query ???? +select + range(-9223372036854775808, -9223372036854775808, -9223372036854775808) as c1, + range(9223372036854775807, 9223372036854775807, 9223372036854775807) as c2, + range(0, -9223372036854775808, -9223372036854775808) as c3, + range(0, 9223372036854775807, 9223372036854775807) as c4; +---- +[] [] [0] [0] + +# Test range for other edge cases +query ???????? +select + range(9223372036854775807, 9223372036854775807, -1) as c1, + range(9223372036854775807, 9223372036854775806, -1) as c2, + range(9223372036854775807, 9223372036854775807, 1) as c3, + range(9223372036854775806, 9223372036854775807, 1) as c4, + range(-9223372036854775808, -9223372036854775808, -1) as c5, + range(-9223372036854775807, -9223372036854775808, -1) as c6, + range(-9223372036854775808, -9223372036854775808, 1) as c7, + range(-9223372036854775808, -9223372036854775807, 1) as c8; +---- +[] [9223372036854775807] [] [9223372036854775806] [] [-9223372036854775807] [] [-9223372036854775808] + +# Test range(start, stop, step) with NULL values +query ? +select range(start, stop, step) from + (values (1), (NULL)) as start_values(start), + (values (10), (NULL)) as stop_values(stop), + (values (3), (NULL)) as step_values(step) +where start is null or stop is null or step is null +---- +NULL +NULL +NULL +NULL +NULL +NULL +NULL + +# Test range(start, stop) with NULL values +query ? +select range(start, stop) from + (values (1), (NULL)) as start_values(start), + (values (10), (NULL)) as stop_values(stop) +where start is null or stop is null +---- +NULL +NULL +NULL + +# Test range(stop) with NULL value +query ? +select range(NULL) +---- +NULL + +## should return NULL +query ? +select range(DATE '1992-09-01', NULL, INTERVAL '1' YEAR); +---- +NULL + +## should return NULL +query ? +select range(TIMESTAMP '1992-09-01', NULL, INTERVAL '1' YEAR); +---- +NULL + +query ? +select range(DATE '1992-09-01', DATE '1993-03-01', NULL); +---- +NULL + +query ? +select range(TIMESTAMP '1992-09-01', TIMESTAMP '1993-03-01', NULL); +---- +NULL + +query ? +select range(NULL, DATE '1993-03-01', INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(NULL::Date, DATE '1993-03-01', INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(DATE '1993-03-01', NULL::Date, INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(DATE '1993-02-01', DATE '1993-03-01', NULL::Interval); +---- +NULL + +query ? +select range(NULL, TIMESTAMP '1993-03-01', INTERVAL '1' YEAR); +---- +NULL + +query ? +select range(NULL, NULL, NULL); +---- +NULL + +query ? +select range(NULL::timestamp, NULL::timestamp, NULL); +---- +NULL + +query ? +select range(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR) +---- +[] + +query ? +select range(TIMESTAMP '1989-04-01', TIMESTAMP '1993-03-01', INTERVAL '-1' YEAR) +---- +[] + +query ? +select range(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR) +---- +[] + +query ? +select range(TIMESTAMP '1993-03-01', TIMESTAMP '1989-04-01', INTERVAL '1' YEAR) +---- +[] + +query error DataFusion error: Execution error: Cannot generate date range less than 1 day\. +select range(DATE '1993-03-01', DATE '1993-03-01', INTERVAL '1' HOUR) + +query ? +select range(TIMESTAMP '1993-03-01', TIMESTAMP '1993-03-01', INTERVAL '1' HOUR) +---- +[] + +query ????????? +select generate_series(5), + generate_series(2, 5), + generate_series(2, 10, 3), + generate_series(1, 5, 1), + generate_series(5, 1, -1), + generate_series(10, 2, -3), + generate_series(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH), + generate_series(DATE '1993-02-01', DATE '1993-01-01', INTERVAL '-1' DAY), + generate_series(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '1' YEAR) +; +---- +[0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] [1, 2, 3, 4, 5] [5, 4, 3, 2, 1] [10, 7, 4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01, 1993-03-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01] [1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] + +query ? +select generate_series('2021-01-01'::timestamp, '2021-01-01T15:00:00'::timestamp, INTERVAL '1' HOUR); +---- +[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00] + +# Other timestamp types are coerced to nanosecond +query ? +select generate_series(arrow_cast('2021-01-01'::timestamp, 'Timestamp(s)'), '2021-01-01T15:00:00'::timestamp, INTERVAL '1' HOUR); +---- +[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00] + +query ? +select generate_series('2021-01-01'::timestamp, arrow_cast('2021-01-01T15:00:00'::timestamp, 'Timestamp(µs)'), INTERVAL '1' HOUR); +---- +[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00] + +query ? +select generate_series('2021-01-01T00:00:00EST'::timestamp, '2021-01-01T15:00:00-12:00'::timestamp, INTERVAL '1' HOUR); +---- +[2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00, 2021-01-01T16:00:00, 2021-01-01T17:00:00, 2021-01-01T18:00:00, 2021-01-01T19:00:00, 2021-01-01T20:00:00, 2021-01-01T21:00:00, 2021-01-01T22:00:00, 2021-01-01T23:00:00, 2021-01-02T00:00:00, 2021-01-02T01:00:00, 2021-01-02T02:00:00, 2021-01-02T03:00:00] + +query ? +select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), arrow_cast('2021-01-01T15:00:00', 'Timestamp(Nanosecond, Some("+05:00"))'), INTERVAL '1' HOUR); +---- +[2021-01-01T00:00:00-05:00, 2021-01-01T01:00:00-05:00, 2021-01-01T02:00:00-05:00, 2021-01-01T03:00:00-05:00, 2021-01-01T04:00:00-05:00, 2021-01-01T05:00:00-05:00] + +## -5500000000 ns is -5.5 sec +query ? +select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), arrow_cast('2021-01-01T06:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), INTERVAL '1 HOUR 30 MINUTE -5500000000 NANOSECOND'); +---- +[2021-01-01T00:00:00-05:00, 2021-01-01T01:29:54.500-05:00, 2021-01-01T02:59:49-05:00, 2021-01-01T04:29:43.500-05:00, 2021-01-01T05:59:38-05:00] + +## mixing types for timestamps is not supported +query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature +select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), DATE '2021-01-02', INTERVAL '1' HOUR); + +## mixing types not allowed even if an argument is null +query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature +select generate_series(TIMESTAMP '1992-09-01', DATE '1993-03-01', NULL); + +query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature +select generate_series(1, '2024-01-01', '2025-01-02'); + +query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature +select generate_series('2024-01-01'::timestamp, '2025-01-02', interval '1 day'); + +## should return NULL +query ? +select generate_series(DATE '1992-09-01', NULL, INTERVAL '1' YEAR); +---- +NULL + +## should return NULL +query ? +select generate_series(TIMESTAMP '1992-09-01', NULL, INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(DATE '1992-09-01', DATE '1993-03-01', NULL); +---- +NULL + +query ? +select generate_series(NULL, DATE '1993-03-01', INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(NULL::Date, DATE '1993-03-01', INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(DATE '1993-03-01', NULL::Date, INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(DATE '1993-02-01', DATE '1993-03-01', NULL::Interval); +---- +NULL + +query ? +select generate_series(NULL, TIMESTAMP '1993-03-01', INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(NULL, NULL, NULL); +---- +NULL + +query ? +select generate_series(NULL::timestamp, NULL::timestamp, NULL); +---- +NULL + +query ? +select generate_series(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR) +---- +[] + +query ? +select generate_series(TIMESTAMP '1989-04-01', TIMESTAMP '1993-03-01', INTERVAL '-1' YEAR) +---- +[] + +query ? +select generate_series(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR) +---- +[] + +query ? +select generate_series(TIMESTAMP '1993-03-01', TIMESTAMP '1989-04-01', INTERVAL '1' YEAR) +---- +[] + +query error DataFusion error: Execution error: Cannot generate date range less than 1 day. +select generate_series(DATE '2000-01-01', DATE '2000-01-03', INTERVAL '1' HOUR) + +query error DataFusion error: Execution error: Cannot generate date range less than 1 day. +select generate_series(DATE '2000-01-01', DATE '2000-01-03', INTERVAL '-1' HOUR) + +query ? +select generate_series(TIMESTAMP '2000-01-01', TIMESTAMP '2000-01-02', INTERVAL '1' HOUR) +---- +[2000-01-01T00:00:00, 2000-01-01T01:00:00, 2000-01-01T02:00:00, 2000-01-01T03:00:00, 2000-01-01T04:00:00, 2000-01-01T05:00:00, 2000-01-01T06:00:00, 2000-01-01T07:00:00, 2000-01-01T08:00:00, 2000-01-01T09:00:00, 2000-01-01T10:00:00, 2000-01-01T11:00:00, 2000-01-01T12:00:00, 2000-01-01T13:00:00, 2000-01-01T14:00:00, 2000-01-01T15:00:00, 2000-01-01T16:00:00, 2000-01-01T17:00:00, 2000-01-01T18:00:00, 2000-01-01T19:00:00, 2000-01-01T20:00:00, 2000-01-01T21:00:00, 2000-01-01T22:00:00, 2000-01-01T23:00:00, 2000-01-02T00:00:00] + +query ? +select generate_series(TIMESTAMP '2000-01-02', TIMESTAMP '2000-01-01', INTERVAL '-1' HOUR) +---- +[2000-01-02T00:00:00, 2000-01-01T23:00:00, 2000-01-01T22:00:00, 2000-01-01T21:00:00, 2000-01-01T20:00:00, 2000-01-01T19:00:00, 2000-01-01T18:00:00, 2000-01-01T17:00:00, 2000-01-01T16:00:00, 2000-01-01T15:00:00, 2000-01-01T14:00:00, 2000-01-01T13:00:00, 2000-01-01T12:00:00, 2000-01-01T11:00:00, 2000-01-01T10:00:00, 2000-01-01T09:00:00, 2000-01-01T08:00:00, 2000-01-01T07:00:00, 2000-01-01T06:00:00, 2000-01-01T05:00:00, 2000-01-01T04:00:00, 2000-01-01T03:00:00, 2000-01-01T02:00:00, 2000-01-01T01:00:00, 2000-01-01T00:00:00] + +# Test generate_series with small intervals +query ? +select generate_series('2000-01-01T00:00:00.000000001Z'::timestamp, '2000-01-01T00:00:00.00000001Z'::timestamp, INTERVAL '1' NANOSECONDS) +---- +[2000-01-01T00:00:00.000000001, 2000-01-01T00:00:00.000000002, 2000-01-01T00:00:00.000000003, 2000-01-01T00:00:00.000000004, 2000-01-01T00:00:00.000000005, 2000-01-01T00:00:00.000000006, 2000-01-01T00:00:00.000000007, 2000-01-01T00:00:00.000000008, 2000-01-01T00:00:00.000000009, 2000-01-01T00:00:00.000000010] + +# Test generate_series with zero step +query error DataFusion error: Execution error: step can't be 0 for function generate_series\(start \[, stop, step\]\) +select generate_series(1, 1, 0); + +# Test generate_series with zero step +query error DataFusion error: Execution error: Interval argument to generate_series must not be 0 +select generate_series(TIMESTAMP '2000-01-02', TIMESTAMP '2000-01-01', INTERVAL '0' MINUTE); + +# Test generate_series with big steps +query ???? +select + generate_series(-9223372036854775808, -9223372036854775808, -9223372036854775808) as c1, + generate_series(9223372036854775807, 9223372036854775807, 9223372036854775807) as c2, + generate_series(0, -9223372036854775808, -9223372036854775808) as c3, + generate_series(0, 9223372036854775807, 9223372036854775807) as c4; +---- +[-9223372036854775808] [9223372036854775807] [0, -9223372036854775808] [0, 9223372036854775807] + + +# Test generate_series for other edge cases +query ???? +select + generate_series(9223372036854775807, 9223372036854775807, -1) as c1, + generate_series(9223372036854775807, 9223372036854775807, 1) as c2, + generate_series(-9223372036854775808, -9223372036854775808, -1) as c3, + generate_series(-9223372036854775808, -9223372036854775808, 1) as c4; +---- +[9223372036854775807] [9223372036854775807] [-9223372036854775808] [-9223372036854775808] + +# Test generate_series(start, stop, step) with NULL values +query ? +select generate_series(start, stop, step) from + (values (1), (NULL)) as start_values(start), + (values (10), (NULL)) as stop_values(stop), + (values (3), (NULL)) as step_values(step) +where start is null or stop is null or step is null +---- +NULL +NULL +NULL +NULL +NULL +NULL +NULL + +# Test generate_series(start, stop) with NULL values +query ? +select generate_series(start, stop) from + (values (1), (NULL)) as start_values(start), + (values (10), (NULL)) as stop_values(stop) +where start is null or stop is null +---- +NULL +NULL +NULL + +# Test generate_series(stop) with NULL value +query ? +select generate_series(NULL) +---- +NULL + +# Test generate_series with a table of date values +statement ok +CREATE TABLE date_table( + start DATE, + stop DATE, + step INTERVAL +) AS VALUES + (DATE '1992-01-01', DATE '1993-01-02', INTERVAL '1' MONTH), + (DATE '1993-02-01', DATE '1993-01-01', INTERVAL '-1' DAY), + (DATE '1989-04-01', DATE '1993-03-01', INTERVAL '1' YEAR); + +query ? +select generate_series(start, stop, step) from date_table; +---- +[1992-01-01, 1992-02-01, 1992-03-01, 1992-04-01, 1992-05-01, 1992-06-01, 1992-07-01, 1992-08-01, 1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01] +[1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01] +[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] + +query ? +select generate_series(start, stop, INTERVAL '1 year') from date_table; +---- +[1992-01-01, 1993-01-01] +[] +[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] + +query ? +select generate_series(start, '1993-03-01'::date, INTERVAL '1 year') from date_table; +---- +[1992-01-01, 1993-01-01] +[1993-02-01] +[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] + +# Test generate_series with a table of timestamp values +statement ok +CREATE TABLE timestamp_table( + start TIMESTAMP, + stop TIMESTAMP, + step INTERVAL +) AS VALUES + (TIMESTAMP '1992-01-01T00:00:00', TIMESTAMP '1993-01-02T00:00:00', INTERVAL '1' MONTH), + (TIMESTAMP '1993-02-01T00:00:00', TIMESTAMP '1993-01-01T00:00:00', INTERVAL '-1' DAY), + (TIMESTAMP '1989-04-01T00:00:00', TIMESTAMP '1993-03-01T00:00:00', INTERVAL '1' YEAR); + +query ? +select generate_series(start, stop, step) from timestamp_table; +---- +[1992-01-01T00:00:00, 1992-02-01T00:00:00, 1992-03-01T00:00:00, 1992-04-01T00:00:00, 1992-05-01T00:00:00, 1992-06-01T00:00:00, 1992-07-01T00:00:00, 1992-08-01T00:00:00, 1992-09-01T00:00:00, 1992-10-01T00:00:00, 1992-11-01T00:00:00, 1992-12-01T00:00:00, 1993-01-01T00:00:00] +[1993-02-01T00:00:00, 1993-01-31T00:00:00, 1993-01-30T00:00:00, 1993-01-29T00:00:00, 1993-01-28T00:00:00, 1993-01-27T00:00:00, 1993-01-26T00:00:00, 1993-01-25T00:00:00, 1993-01-24T00:00:00, 1993-01-23T00:00:00, 1993-01-22T00:00:00, 1993-01-21T00:00:00, 1993-01-20T00:00:00, 1993-01-19T00:00:00, 1993-01-18T00:00:00, 1993-01-17T00:00:00, 1993-01-16T00:00:00, 1993-01-15T00:00:00, 1993-01-14T00:00:00, 1993-01-13T00:00:00, 1993-01-12T00:00:00, 1993-01-11T00:00:00, 1993-01-10T00:00:00, 1993-01-09T00:00:00, 1993-01-08T00:00:00, 1993-01-07T00:00:00, 1993-01-06T00:00:00, 1993-01-05T00:00:00, 1993-01-04T00:00:00, 1993-01-03T00:00:00, 1993-01-02T00:00:00, 1993-01-01T00:00:00] +[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] + +query ? +select generate_series(start, stop, INTERVAL '1 year') from timestamp_table; +---- +[1992-01-01T00:00:00, 1993-01-01T00:00:00] +[] +[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] + +query ? +select generate_series(start, '1993-03-01T00:00:00'::timestamp, INTERVAL '1 year') from timestamp_table; +---- +[1992-01-01T00:00:00, 1993-01-01T00:00:00] +[1993-02-01T00:00:00] +[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] + +# https://github.com/apache/datafusion/issues/11922 +query ? +select generate_series(start, '1993-03-01T00:00:00'::timestamp, INTERVAL '1 year') from timestamp_table; +---- +[1992-01-01T00:00:00, 1993-01-01T00:00:00] +[1993-02-01T00:00:00] +[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] + +## array_except + +statement ok +CREATE TABLE array_except_table +AS VALUES + ([1, 2, 2, 3], [2, 3, 4]), + ([2, 3, 3], [3]), + ([3], [3, 3, 4]), + (null, [3, 4]), + ([1, 2], null), + (null, null) +; + +query ? +select array_except(column1, column2) from array_except_table; +---- +[1] +[2] +[] +NULL +NULL +NULL + +statement ok +drop table array_except_table; + +statement ok +CREATE TABLE array_except_nested_list_table +AS VALUES + ([[1, 2], [3]], [[2], [3], [4, 5]]), + ([[1, 2], [3]], [[2], [1, 2]]), + ([[1, 2], [3]], null), + (null, [[1], [2, 3], [4, 5, 6]]), + ([[1], [2, 3], [4, 5, 6]], [[2, 3], [4, 5, 6], [1]]) +; + +query ? +select array_except(column1, column2) from array_except_nested_list_table; +---- +[[1, 2]] +[[3]] +NULL +NULL +[] + +statement ok +drop table array_except_nested_list_table; + +statement ok +CREATE TABLE array_except_table_float +AS VALUES + ([1.1, 2.2, 3.3], [2.2]), + ([1.1, 2.2, 3.3], [4.4]), + ([1.1, 2.2, 3.3], [3.3, 2.2, 1.1]) +; + +query ? +select array_except(column1, column2) from array_except_table_float; +---- +[1.1, 3.3] +[1.1, 2.2, 3.3] +[] + +statement ok +drop table array_except_table_float; + +statement ok +CREATE TABLE array_except_table_ut8 +AS VALUES + (['a', 'b', 'c'], ['a']), + (['a', 'bc', 'def'], ['g', 'def']), + (['a', 'bc', 'def'], null), + (null, ['a']) +; + +query ? +select array_except(column1, column2) from array_except_table_ut8; +---- +[b, c] +[a, bc] +NULL +NULL + +statement ok +drop table array_except_table_ut8; + +statement ok +CREATE TABLE array_except_table_bool +AS VALUES + ([true, false, false], [false]), + ([true, true, true], [false]), + ([false, false, false], [true]), + ([true, false], null), + (null, [true, false]) +; + +query ? +select array_except(column1, column2) from array_except_table_bool; +---- +[true] +[true] +[false] +NULL +NULL + +statement ok +drop table array_except_table_bool; + +query ? +select array_except([], null); +---- +NULL + +query ? +select array_except([], []); +---- +[] + +query ? +select array_except(null, []); +---- +NULL + +query ? +select array_except(null, null) +---- +NULL + +query ? +select array_except(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 6, 3, 4], 'LargeList(Int64)')); +---- +[1, 2] + +query ? +select array_except(arrow_cast([1, 2, 3, 4], 'FixedSizeList(4, Int64)'), arrow_cast([5, 6, 3, 4], 'FixedSizeList(4, Int64)')); +---- +[1, 2] + +### Array operators tests + + +## array concatenate operator + +# array concatenate operator with scalars #1 (like array_concat scalar function) +query ?? +select make_array(1, 2, 3) || make_array(4, 5, 6) || make_array(7, 8, 9), make_array([1], [2]) || make_array([3], [4]); +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] + +# array concatenate operator with scalars #2 (like array_append scalar function) +query ??? +select make_array(1, 2, 3) || 4, make_array(1.0, 2.0, 3.0) || 4.0, make_array('h', 'e', 'l', 'l') || 'o'; +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array concatenate operator with scalars #3 (like array_prepend scalar function) +query ??? +select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' || make_array('e', 'l', 'l', 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array concatenate operator with scalars #4 (mixed) +query ? +select 0 || [1,2,3] || 4 || [5] || [6,7]; +---- +[0, 1, 2, 3, 4, 5, 6, 7] + +# array concatenate operator with nd-list #5 (mixed) +query ? +select 0 || [1,2,3] || [[4,5]] || [[6,7,8]] || [9,10]; +---- +[[0, 1, 2, 3], [4, 5], [6, 7, 8], [9, 10]] + +# array concatenate operator non-valid cases +## concat 2D with scalar is not valid +query error +select 0 || [1,2,3] || [[4,5]] || [[6,7,8]] || [9,10] || 11; + +## concat scalar with 2D is not valid +query error +select 0 || [[1,2,3]]; + +# array concatenate operator with column + +statement ok +CREATE TABLE array_concat_operator_table +AS VALUES + (0, [1, 2, 2, 3], 4, [5, 6, 5]), + (-1, [4, 5, 6], 7, [8, 1, 1]) +; + +query ? +select column1 || column2 || column3 || column4 from array_concat_operator_table; +---- +[0, 1, 2, 2, 3, 4, 5, 6, 5] +[-1, 4, 5, 6, 7, 8, 1, 1] + +statement ok +drop table array_concat_operator_table; + +## array containment operator + +# array containment operator with scalars #1 (at arrow) +query BBBBBBB +select make_array(1,2,3) @> make_array(1,3), + make_array(1,2,3) @> make_array(1,4), + make_array([1,2], [3,4]) @> make_array([1,2]), + make_array([1,2], [3,4]) @> make_array([1,3]), + make_array([1,2], [3,4]) @> make_array([1,2], [3,4], [5,6]), + make_array([[1,2,3]]) @> make_array([[1]]), + make_array([[1,2,3]]) @> make_array([[1,2,3]]); +---- +true false true false false false true + +# Make sure it is rewritten to function array_has_all() +query TT +explain select [1,2,3] @> [1,3]; +---- +logical_plan +01)Projection: Boolean(true) AS array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3))) +02)--EmptyRelation: rows=1 +physical_plan +01)ProjectionExec: expr=[true as array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3)))] +02)--PlaceholderRowExec + +# array containment operator with scalars #2 (arrow at) +query BBBBBBB +select make_array(1,3) <@ make_array(1,2,3), + make_array(1,4) <@ make_array(1,2,3), + make_array([1,2]) <@ make_array([1,2], [3,4]), + make_array([1,3]) <@ make_array([1,2], [3,4]), + make_array([1,2], [3,4], [5,6]) <@ make_array([1,2], [3,4]), + make_array([[1]]) <@ make_array([[1,2,3]]), + make_array([[1,2,3]]) <@ make_array([[1,2,3]]); +---- +true false true false false false true + +# Make sure it is rewritten to function array_has_all() +query TT +explain select [1,3] <@ [1,2,3]; +---- +logical_plan +01)Projection: Boolean(true) AS array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3))) +02)--EmptyRelation: rows=1 +physical_plan +01)ProjectionExec: expr=[true as array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3)))] +02)--PlaceholderRowExec + +### Array casting tests + + +## make_array + +# make_array scalar function #1 +query ? +select make_array(1, 2.0) +---- +[1.0, 2.0] + +# make_array scalar function #2 +query ? +select make_array(null, 1.0) +---- +[NULL, 1.0] + +# make_array scalar function #3 +query ? +select make_array(1, 2.0, null, 3) +---- +[1.0, 2.0, NULL, 3.0] + +# make_array scalar function #4 +query ? +select make_array(1.0, '2', null) +---- +[1.0, 2.0, NULL] + +### FixedSizeListArray + +statement ok +CREATE EXTERNAL TABLE fixed_size_list_array STORED AS PARQUET LOCATION '../core/tests/data/fixed_size_list_array.parquet'; + +query T +select arrow_typeof(f0) from fixed_size_list_array; +---- +FixedSizeList(2 x Int64) +FixedSizeList(2 x Int64) + +query ? +select * from fixed_size_list_array; +---- +[1, 2] +[3, 4] + +query ? +select f0 from fixed_size_list_array; +---- +[1, 2] +[3, 4] + +query ? +select arrow_cast(f0, 'List(Int64)') from fixed_size_list_array; +---- +[1, 2] +[3, 4] + +query ? +select make_array(arrow_cast(f0, 'List(Int64)')) from fixed_size_list_array +---- +[[1, 2]] +[[3, 4]] + +query T +select arrow_typeof(make_array(arrow_cast(f0, 'List(Int64)'))) from fixed_size_list_array +---- +List(List(Int64)) +List(List(Int64)) + +query ? +select make_array(f0) from fixed_size_list_array +---- +[[1, 2]] +[[3, 4]] + +query T +select arrow_typeof(make_array(f0)) from fixed_size_list_array +---- +List(FixedSizeList(2 x Int64)) +List(FixedSizeList(2 x Int64)) + +query ? +select array_concat(column1, [7]) from arrays_values_v2; +---- +[NULL, 2, 3, 7] +[7] +[9, NULL, 10, 7] +[NULL, 1, 7] +[11, 12, 7] +[7] + +# flatten + +query ? +select flatten(NULL); +---- +NULL + +# flatten with scalar values #1 +query ??? +select flatten(make_array(1, 2, 1, 3, 2)), + flatten(make_array([1], [2, 3], [null], make_array(4, null, 5))), + flatten(make_array([[1.1]], [[2.2]], [[3.3], [4.4]])); +---- +[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] + +query ??? +select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'LargeList(Int64)')), + flatten(arrow_cast(make_array([1], null, [2, 3], [null], make_array(4, null, 5)), 'LargeList(LargeList(Int64))')), + flatten(arrow_cast(make_array([[1.1]], [[2.2]], [[3.3], [4.4]]), 'LargeList(LargeList(LargeList(Float64)))')); +---- +[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] + +query ??? +select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'FixedSizeList(5, Int64)')), + flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, List(Int64))')), + flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'FixedSizeList(2, List(List(Float64)))')); +---- +[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] + +query ??TT +select flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, LargeList(Int64))')), + flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'List(LargeList(FixedSizeList(1, Float64)))')), + arrow_typeof(flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, LargeList(Int64))'))), + arrow_typeof(flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'List(LargeList(FixedSizeList(1, Float64)))'))); +---- +[1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] LargeList(Int64) LargeList(FixedSizeList(1 x Float64)) + +# flatten with column values +query ???? +select flatten(column1), + flatten(column2), + flatten(column3), + flatten(column4) +from flatten_table; +---- +[1, 2, 3] [[1, 2, 3], [4, 5], [6]] [[[1]], [[2, 3]]] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] +[1, 2, 3, 4, 5, 6] [[8]] [[[1, 2]], [[3]]] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + +query ???? +select flatten(column1), + flatten(column2), + flatten(column3), + flatten(column4) +from large_flatten_table; +---- +[1, 2, 3] [[1, 2, 3], [4, 5], [6]] [[[1]], [[2, 3]]] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] +[1, 2, 3, 4, 5, 6] [[8]] [[[1, 2]], [[3]]] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + +query ???? +select flatten(column1), + flatten(column2), + flatten(column3), + flatten(column4) +from fixed_size_flatten_table; +---- +[1, 2, 3] [[1, 2, 3], [4, 5], [6]] [[[1]], [[2, 3]]] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] +[1, 2, 3, 4, 5, 6] [[8], [9, 10], [11, 12, 13]] [[[1, 2]], [[3]]] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + +# flatten with different inner list type +query ?????? +select flatten(arrow_cast(make_array([1, 2], [3, 4]), 'List(FixedSizeList(2, Int64))')), + flatten(arrow_cast(make_array([[1, 2]], [[3, 4]]), 'List(FixedSizeList(1, List(Int64)))')), + flatten(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), + flatten(arrow_cast(make_array([[1, 2]], [[3, 4]]), 'LargeList(List(List(Int64)))')), + flatten(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(FixedSizeList(2, Int64))')), + flatten(arrow_cast(make_array([[1, 2]], [[3, 4]]), 'LargeList(FixedSizeList(1, List(Int64)))')) +---- +[1, 2, 3, 4] [[1, 2], [3, 4]] [1, 2, 3, 4] [[1, 2], [3, 4]] [1, 2, 3, 4] [[1, 2], [3, 4]] + +## empty (aliases: `array_empty`, `list_empty`) +# empty scalar function #1 +query B +select empty(make_array(1)); +---- +false + +query B +select empty(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +false + +query B +select empty(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +false + +# empty scalar function #2 +query B +select empty(make_array()); +---- +true + +query B +select empty(arrow_cast(make_array(), 'LargeList(Int64)')); +---- +true + +#TODO: https://github.com/apache/datafusion/issues/9158 +#query B +#select empty(arrow_cast(make_array(), 'FixedSizeList(0, Null)')); +#---- +#true + +# empty scalar function #3 +query B +select empty(make_array(NULL)); +---- +false + +query B +select empty(arrow_cast(make_array(NULL), 'LargeList(Int64)')); +---- +false + +query B +select empty(arrow_cast(make_array(NULL), 'FixedSizeList(1, Int64)')); +---- +false + +#TODO: https://github.com/apache/datafusion/issues/7142 +# empty scalar function #4 +#query B +#select empty(NULL); +#---- +#NULL + +# empty scalar function #5 +query B +select empty(column1) from arrays; +---- +false +false +false +false +NULL +false +false + +query B +select empty(arrow_cast(column1, 'LargeList(List(Int64))')) from arrays; +---- +false +false +false +false +NULL +false +false + +query B +select empty(column1) from fixed_size_arrays; +---- +false +false +false +false +NULL +false +false + +## array_empty (aliases: `empty`, `list_empty`) +# array_empty scalar function #1 +query B +select array_empty(make_array(1)); +---- +false + +query B +select array_empty(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +false + +# array_empty scalar function #2 +query B +select array_empty(make_array()); +---- +true + +query B +select array_empty(arrow_cast(make_array(), 'LargeList(Int64)')); +---- +true + +# array_empty scalar function #3 +query B +select array_empty(make_array(NULL)); +---- +false + +query B +select array_empty(arrow_cast(make_array(NULL), 'LargeList(Int64)')); +---- +false + +## list_empty (aliases: `empty`, `array_empty`) +# list_empty scalar function #1 +query B +select list_empty(make_array(1)); +---- +false + +query B +select list_empty(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +false + +# list_empty scalar function #2 +query B +select list_empty(make_array()); +---- +true + +query B +select list_empty(arrow_cast(make_array(), 'LargeList(Int64)')); +---- +true + +# list_empty scalar function #3 +query B +select list_empty(make_array(NULL)); +---- +false + +query B +select list_empty(arrow_cast(make_array(NULL), 'LargeList(Int64)')); +---- +false + +# string_to_array scalar function +query ? +SELECT string_to_array('abcxxxdef', 'xxx') +---- +[abc, def] + +query I +SELECT cardinality(string_to_array('', ',')) +---- +0 + +query I +SELECT cardinality(string_to_array('', '')) +---- +0 + +query I +SELECT cardinality(string_to_array('', ',', 'x')) +---- +0 + +query I +SELECT cardinality(string_to_array('', '', 'x')) +---- +0 + +query ? +SELECT string_to_array('abc', '') +---- +[abc] + +query ? +SELECT string_to_array('abc', NULL) +---- +[a, b, c] + +query ? +SELECT string_to_array('abc def', ' ', 'def') +---- +[abc, NULL] + +query ? +select string_to_array(e, ',') from values; +---- +[Lorem] +[ipsum] +[dolor] +[sit] +[amet] +[, ] +[consectetur] +[adipiscing] +NULL + +# karge string tests for string_to_array + +# string_to_array scalar function +query ? +SELECT string_to_array(arrow_cast('abcxxxdef', 'LargeUtf8'), 'xxx') +---- +[abc, def] + +# string_to_array scalar function +query ? +SELECT string_to_array(arrow_cast('abcxxxdef', 'LargeUtf8'), arrow_cast('xxx', 'LargeUtf8')) +---- +[abc, def] + +query ? +SELECT string_to_array(arrow_cast('abc', 'LargeUtf8'), NULL) +---- +[a, b, c] + +query ? +select string_to_array(arrow_cast(e, 'LargeUtf8'), ',') from values; +---- +[Lorem] +[ipsum] +[dolor] +[sit] +[amet] +[, ] +[consectetur] +[adipiscing] +NULL + +query ? +select string_to_array(arrow_cast(e, 'LargeUtf8'), ',', arrow_cast('Lorem', 'LargeUtf8')) from values; +---- +[NULL] +[ipsum] +[dolor] +[sit] +[amet] +[, ] +[consectetur] +[adipiscing] +NULL + +# string view tests for string_to_array + +# string_to_array scalar function +query ? +SELECT string_to_array(arrow_cast('abcxxxdef', 'Utf8View'), 'xxx') +---- +[abc, def] + +query ? +SELECT string_to_array(arrow_cast('abc', 'Utf8View'), NULL) +---- +[a, b, c] + +query ? +select string_to_array(arrow_cast(e, 'Utf8View'), ',') from values; +---- +[Lorem] +[ipsum] +[dolor] +[sit] +[amet] +[, ] +[consectetur] +[adipiscing] +NULL + +# test string_to_array aliases + +query ? +select string_to_list(e, 'm') from values; +---- +[Lore, ] +[ipsu, ] +[dolor] +[sit] +[a, et] +[,] +[consectetur] +[adipiscing] +NULL + +# string_to_array: single-char delimiter producing multiple elements +query ? +SELECT string_to_array('a,b,c', ',') +---- +[a, b, c] + +# string_to_array: delimiter not found in input +query ? +SELECT string_to_array('abc', ',') +---- +[abc] + +# string_to_array: empty string input +query ? +SELECT string_to_array('', ',') +---- +[] + +# string_to_array: null_str matching multiple elements +query ? +SELECT string_to_array('a,NULL,b,NULL,c', ',', 'NULL') +---- +[a, NULL, b, NULL, c] + +# string_to_array: null_str matching all elements +query ? +SELECT string_to_array('x,x,x', ',', 'x') +---- +[NULL, NULL, NULL] + +# string_to_array: null_str with empty-string delimiter +query ? +SELECT string_to_array('abc', '', 'abc') +---- +[NULL] + +# string_to_array: NULL string input +query ? +SELECT string_to_array(NULL, ',') +---- +NULL + +# string_to_array: columnar delimiter +query ?? +SELECT string_to_array('a,b,c', col1), string_to_array('a::b::c', col2) + FROM (VALUES (',', '::')) AS t(col1, col2) +---- +[a, b, c] [a, b, c] + +# string_to_array: columnar null_str +query ? +SELECT string_to_array('a,NULL,b', ',', col1) + FROM (VALUES ('NULL')) AS t(col1) +---- +[a, NULL, b] + +# string_to_array: adjacent delimiters produce empty strings +query ? +SELECT string_to_array('a,,b', ',') +---- +[a, , b] + +# string_to_array: delimiter at start and end +query ? +SELECT string_to_array(',a,b,', ',') +---- +[, a, b, ] + +# array_resize scalar function #1 +query ? +select array_resize(make_array(1, 2, 3), 1); +---- +[1] + +query ? +select array_resize(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 1); +---- +[1] + +query ? +select array_resize(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 1); +---- +[1] + +# array_resize scalar function #2 +query ? +select array_resize(make_array(1, 2, 3), 5); +---- +[1, 2, 3, NULL, NULL] + +query ? +select array_resize(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 5); +---- +[1, 2, 3, NULL, NULL] + +query ? +select array_resize(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 5); +---- +[1, 2, 3, NULL, NULL] + +# array_resize scalar function #3 +query ? +select array_resize(make_array(1, 2, 3), 5, 4); +---- +[1, 2, 3, 4, 4] + +query ? +select array_resize(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 5, 4); +---- +[1, 2, 3, 4, 4] + +# array_resize scalar function #4 +query error +select array_resize(make_array(1, 2, 3), -5, 2); + +# array_resize scalar function #5 +query ? +select array_resize(make_array(1.1, 2.2, 3.3), 10, 9.9); +---- +[1.1, 2.2, 3.3, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9] + +query ? +select array_resize(arrow_cast(make_array(1.1, 2.2, 3.3), 'LargeList(Float64)'), 10, 9.9); +---- +[1.1, 2.2, 3.3, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9] + +# array_resize scalar function #5 +query ? +select array_resize(column1, column2, column3) from arrays_values; +---- +[NULL] +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 2, 2] +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] +NULL +[] +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] + +query ? +select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from arrays_values; +---- +[NULL] +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 2, 2] +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] +NULL +[] +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] + +# array_resize scalar function #5 +query ? +select array_resize([[1], [2], [3]], 10, [5]); +---- +[[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]] + +query ? +select array_resize(arrow_cast([[1], [2], [3]], 'LargeList(List(Int64))'), 10, [5]); +---- +[[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]] + +# array_resize null value +query ? +select array_resize(arrow_cast(NULL, 'List(Int8)'), 1); +---- +NULL + +statement ok +CREATE TABLE array_resize_values +AS VALUES + (make_array(1, NULL, 3, 4, 5, 6, 7, 8, 9, 10), 2, 1), + (make_array(11, 12, NULL, 14, 15, 16, 17, 18, 19, 20), 5, 2), + (make_array(21, 22, 23, 24, NULL, 26, 27, 28, 29, 30), 8, 3), + (make_array(31, 32, 33, 34, 35, 36, NULL, 38, 39, 40), 12, 4), + (NULL, 3, 0), + (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6), + (make_array(51, 52, 53, 54, 55, NULL, 57, 58, 59, 60), 13, NULL), + (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 15, 7) +; + +# array_resize columnar test #1 +query ? +select array_resize(column1, column2, column3) from array_resize_values; +---- +[1, NULL] +[11, 12, NULL, 14, 15] +[21, 22, 23, 24, NULL, 26, 27, 28] +[31, 32, 33, 34, 35, 36, NULL, 38, 39, 40, 4, 4] +NULL +[] +[51, 52, 53, 54, 55, NULL, 57, 58, 59, 60, NULL, NULL, NULL] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7] + +# array_resize columnar test #2 +query ? +select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from array_resize_values; +---- +[1, NULL] +[11, 12, NULL, 14, 15] +[21, 22, 23, 24, NULL, 26, 27, 28] +[31, 32, 33, 34, 35, 36, NULL, 38, 39, 40, 4, 4] +NULL +[] +[51, 52, 53, 54, 55, NULL, 57, 58, 59, 60, NULL, NULL, NULL] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7] + +## array_reverse +query ?? +select array_reverse(make_array(1, 2, 3)), array_reverse(make_array(1)); +---- +[3, 2, 1] [1] + +query ?? +select array_reverse(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_reverse(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +[3, 2, 1] [1] + +query ???? +select array_reverse(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), + array_reverse(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')), + array_reverse(arrow_cast(make_array(1, NULL, 3), 'FixedSizeList(3, Int64)')), + array_reverse(arrow_cast(make_array(NULL, NULL, NULL), 'FixedSizeList(3, Int64)')); +---- +[3, 2, 1] [1] [3, NULL, 1] [NULL, NULL, NULL] + +query ?? +select array_reverse(NULL), array_reverse([]); +---- +NULL [] + +query ?? +select array_reverse(column1), column1 from arrays_values; +---- +[10, 9, 8, 7, 6, 5, 4, 3, 2, NULL] [NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[20, NULL, 18, 17, 16, 15, 14, 13, 12, 11] [11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] +[30, 29, 28, 27, 26, 25, NULL, 23, 22, 21] [21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] +[40, 39, 38, 37, NULL, 35, 34, 33, 32, 31] [31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] +NULL NULL +[50, 49, 48, 47, 46, 45, 44, 43, 42, 41] [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[60, 59, 58, 57, 56, 55, 54, NULL, 52, 51] [51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] +[70, 69, 68, 67, 66, 65, 64, 63, 62, 61] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] + +statement ok +CREATE TABLE test_reverse_fixed_size AS VALUES + (arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)')), + (arrow_cast([4, 5, 6], 'FixedSizeList(3, Int64)')), + (arrow_cast([NULL, 8, 9], 'FixedSizeList(3, Int64)')), + (NULL); + +query ? +SELECT array_reverse(column1) FROM test_reverse_fixed_size; +---- +[3, 2, 1] +[6, 5, 4] +[9, 8, NULL] +NULL + +statement ok +DROP TABLE test_reverse_fixed_size; + +# Test defining a table with array columns +statement ok +create table test_create_array_table( + a int[], + b text[], + -- two-dimensional array + c int[][], + d int +); + +query I +insert into test_create_array_table values + ([1, 2, 3], ['a', 'b', 'c'], [[4,6], [6,7,8]], 1); +---- +1 + +query ???I +select * from test_create_array_table; +---- +[1, 2, 3] [a, b, c] [[4, 6], [6, 7, 8]] 1 + +query T +select arrow_typeof(a) from test_create_array_table; +---- +List(Int32) + +query T +select arrow_typeof(c) from test_create_array_table; +---- +List(List(Int32)) + +# Test casting to array types +# issue: https://github.com/apache/datafusion/issues/9440 +query ??T +select [1,2,3]::int[], [['1']]::int[][], arrow_typeof([]::text[]); +---- +[1, 2, 3] [[1]] List(Utf8View) + +# test empty arrays return length +# issue: https://github.com/apache/datafusion/pull/12459 +statement ok +create table values_all_empty (a int[]) as values ([]), ([]); + +query B +select array_has(a, 1) from values_all_empty; +---- +false +false + +# Test create table with fixed sized array +statement ok +create table fixed_size_col_table (a int[3]) as values ([1,2,3]), ([4,5,6]); + +query T +select arrow_typeof(a) from fixed_size_col_table; +---- +FixedSizeList(3 x Int32) +FixedSizeList(3 x Int32) + +query ? rowsort +SELECT DISTINCT a FROM fixed_size_col_table +---- +[1, 2, 3] +[4, 5, 6] + +query ?I rowsort +SELECT a, count(*) FROM fixed_size_col_table GROUP BY a +---- +[1, 2, 3] 1 +[4, 5, 6] 1 + +statement error Cast error: Cannot cast to FixedSizeList\(3\): value at index 0 has length 2 +create table varying_fixed_size_col_table (a int[3]) as values ([1,2,3]), ([4,5]); + +# https://github.com/apache/datafusion/issues/16187 +# should be NULL in case of out of bounds for Null Type +query ? +select [named_struct('a', 1, 'b', null)][-2]; +---- +NULL + +statement ok +COPY (select [[true, false], [false, true]] a, [false, true] b union select [[null, null]], null) to 'test_files/scratch/array/array_has/single_file.parquet' stored as parquet; + +statement ok +CREATE EXTERNAL TABLE array_has STORED AS PARQUET location 'test_files/scratch/array/array_has/single_file.parquet'; + +query B +select array_contains(a, b) from array_has order by 1 nulls last; +---- +true +NULL + +# Expected output (once supported): +# ---- +# [5, 4, 3, 2, 1] +query error +select array_reverse(arrow_cast(make_array(1, 2, 3, 4, 5), 'ListView(Int64)')); + +### Delete tables + +statement ok +drop table values; + +statement ok +drop table values_without_nulls; + +statement ok +drop table nested_arrays; + +statement ok +drop table large_nested_arrays; + +statement ok +drop table fixed_size_nested_arrays; + +statement ok +drop table arrays; + +statement ok +drop table large_arrays; + +statement ok +drop table fixed_size_arrays; + +statement ok +drop table slices; + +statement ok +drop table fixed_slices; + +statement ok +drop table arrayspop; + +statement ok +drop table large_arrayspop; + +statement ok +drop table arrays_values; + +statement ok +drop table arrays_values_v2; + +statement ok +drop table large_arrays_values_v2; + +statement ok +drop table array_has_table_1D; + +statement ok +drop table array_has_table_1D_Float; + +statement ok +drop table array_has_table_1D_Boolean; + +statement ok +drop table array_has_table_1D_UTF8; + +statement ok +drop table array_has_table_2D; + +statement ok +drop table array_has_table_2D_float; + +statement ok +drop table array_has_table_3D; + +statement ok +drop table array_intersect_table_1D; + +statement ok +drop table large_array_intersect_table_1D; + +statement ok +drop table array_intersect_table_1D_Float; + +statement ok +drop table large_array_intersect_table_1D_Float; + +statement ok +drop table array_intersect_table_1D_Boolean; + +statement ok +drop table large_array_intersect_table_1D_Boolean; + +statement ok +drop table array_intersect_table_1D_UTF8; + +statement ok +drop table large_array_intersect_table_1D_UTF8; + +statement ok +drop table array_intersect_table_2D; + +statement ok +drop table large_array_intersect_table_2D; + +statement ok +drop table array_intersect_table_2D_float; + +statement ok +drop table large_array_intersect_table_2D_float; + +statement ok +drop table array_intersect_table_3D; + +statement ok +drop table large_array_intersect_table_3D; + +statement ok +drop table fixed_size_array_has_table_1D; + +statement ok +drop table fixed_size_array_has_table_1D_Float; + +statement ok +drop table fixed_size_array_has_table_1D_Boolean; + +statement ok +drop table fixed_size_array_has_table_1D_UTF8; + +statement ok +drop table fixed_size_array_has_table_2D; + +statement ok +drop table fixed_size_array_has_table_2D_float; + +statement ok +drop table fixed_size_array_has_table_3D; + +statement ok +drop table arrays_range; + +statement ok +drop table arrays_with_repeating_elements; + +statement ok +drop table large_arrays_with_repeating_elements; + +statement ok +drop table fixed_arrays_with_repeating_elements; + +statement ok +drop table nested_arrays_with_repeating_elements; + +statement ok +drop table large_nested_arrays_with_repeating_elements; + +statement ok +drop table fixed_size_nested_arrays_with_repeating_elements; + +statement ok +drop table flatten_table; + +statement ok +drop table large_flatten_table; + +statement ok +drop table fixed_size_flatten_table; + +statement ok +drop table arrays_values_without_nulls; + +statement ok +drop table large_arrays_values_without_nulls; + +statement ok +drop table fixed_size_arrays_values_without_nulls; + +statement ok +drop table test_create_array_table; + +statement ok +drop table values_all_empty; + +statement ok +drop table fixed_size_col_table; + +statement ok +drop table array_has; From ff6a2188b33727bd5fa843c445f570cf80f1e836 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 9 Apr 2026 00:46:31 +0200 Subject: [PATCH 32/70] Register cache from cachemanager at listing table --- datafusion/core/src/execution/context/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 5db36f1cd923b..28b6b461f724e 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1761,7 +1761,9 @@ impl SessionContext { let config = ListingTableConfig::new(table_path) .with_listing_options(options) .with_schema(resolved_schema); - let table = ListingTable::try_new(config)?.with_definition(sql_definition); + let table = ListingTable::try_new(config)?.with_definition(sql_definition).with_cache( + self.runtime_env().cache_manager.get_file_statistic_cache() + ); self.register_table(table_ref, Arc::new(table))?; Ok(()) } From 1926d4f4822a4cb327d87fe7129b21b2c20ba206 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 9 Apr 2026 01:44:45 +0200 Subject: [PATCH 33/70] Revert slt --- datafusion/sqllogictest/test_files/array.slt | 4 ---- datafusion/sqllogictest/test_files/encrypted_parquet.slt | 4 ---- .../sqllogictest/test_files/parquet_filter_pushdown.slt | 3 --- .../sqllogictest/test_files/parquet_sorted_statistics.slt | 4 ---- 4 files changed, 15 deletions(-) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 45cf02700c39a..81d5c8f91a5bc 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -689,10 +689,6 @@ AS FROM arrays_distance_table ; -# Disable file statistics cache because file statistics have been previously created -statement ok -set datafusion.runtime.file_statistics_cache_limit = "0K"; - # Array literal diff --git a/datafusion/sqllogictest/test_files/encrypted_parquet.slt b/datafusion/sqllogictest/test_files/encrypted_parquet.slt index fd375778b7a53..d580b7d1ad2b8 100644 --- a/datafusion/sqllogictest/test_files/encrypted_parquet.slt +++ b/datafusion/sqllogictest/test_files/encrypted_parquet.slt @@ -77,10 +77,6 @@ ORDER BY double_field 3 4 5 6 -# Disable file statistics cache because file statistics have been previously created -statement ok -set datafusion.runtime.file_statistics_cache_limit = "0K"; - statement count 0 CREATE EXTERNAL TABLE parquet_table ( diff --git a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt index 80a1a838cb7e9..85f9549357138 100644 --- a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt +++ b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt @@ -37,9 +37,6 @@ COPY ( ) TO 'test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet' STORED AS PARQUET; -statement ok -set datafusion.runtime.file_statistics_cache_limit = "0K"; - ## Create table without filter pushdown ## (pushdown setting is part of the table, but is copied from the session settings) diff --git a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt index 53ec7e72d9f16..a4a613e383ec8 100644 --- a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt +++ b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt @@ -177,10 +177,6 @@ physical_plan statement ok DROP TABLE test_table; -# Disable file statistics cache because file statistics have been previously created -statement ok -set datafusion.runtime.file_statistics_cache_limit = "0K"; - statement ok CREATE EXTERNAL TABLE test_table ( partition_col TEXT NOT NULL, From 09a2591bffd0afc4164815f8d650bbca21a13b3b Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 18 Feb 2026 14:10:14 +0100 Subject: [PATCH 34/70] Add tablescoping for file stats cache --- datafusion/catalog-listing/src/helpers.rs | 3 +- datafusion/catalog-listing/src/table.rs | 6 +- datafusion/core/src/execution/context/mod.rs | 4 +- datafusion/datasource/src/mod.rs | 14 +- .../execution/src/cache/cache_manager.rs | 5 +- datafusion/execution/src/cache/cache_unit.rs | 141 ++++++++++++------ 6 files changed, 121 insertions(+), 52 deletions(-) diff --git a/datafusion/catalog-listing/src/helpers.rs b/datafusion/catalog-listing/src/helpers.rs index 62257ec027c6f..0faa5834c19ab 100644 --- a/datafusion/catalog-listing/src/helpers.rs +++ b/datafusion/catalog-listing/src/helpers.rs @@ -367,7 +367,8 @@ fn try_into_partitioned_file( let mut pf: PartitionedFile = object_meta.into(); pf.partition_values = partition_values; - + pf.table_reference = table_path.get_table_ref().clone(); + Ok(Some(pf)) } diff --git a/datafusion/catalog-listing/src/table.rs b/datafusion/catalog-listing/src/table.rs index 0ed2b452bc626..84d2b467524d8 100644 --- a/datafusion/catalog-listing/src/table.rs +++ b/datafusion/catalog-listing/src/table.rs @@ -799,12 +799,12 @@ impl ListingTable { ) -> datafusion_common::Result<(Arc, Option)> { use datafusion_execution::cache::cache_manager::CachedFileMetadata; - let path = &part_file.object_meta.location; + let path = TableScopedPath { table: part_file.table_reference.clone(), path : part_file.object_meta.location.clone()}; let meta = &part_file.object_meta; // Check cache first - if we have valid cached statistics and ordering if let Some(cache) = &self.collected_statistics - && let Some(cached) = cache.get(path) + && let Some(cached) = cache.get(&path) && cached.is_valid_for(meta) { // Return cached statistics and ordering @@ -823,7 +823,7 @@ impl ListingTable { // Store in cache if let Some(cache) = &self.collected_statistics { cache.put( - path, + &path, CachedFileMetadata::new( meta.clone(), Arc::clone(&statistics), diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 28b6b461f724e..df310bd8aa891 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1427,8 +1427,10 @@ impl SessionContext { schema.deregister_table(&table)?; if table_type == TableType::Base && let Some(lfc) = self.runtime_env().cache_manager.get_list_files_cache() + && let Some(fsc) = self.runtime_env().cache_manager.get_file_statistic_cache() { - lfc.drop_table_entries(&Some(table_ref))?; + lfc.drop_table_entries(&Some(table_ref.clone()))?; + fsc.drop_table_entries(&Some(table_ref.clone()))?; } return Ok(true); } diff --git a/datafusion/datasource/src/mod.rs b/datafusion/datasource/src/mod.rs index a9600271c28ce..575ffbb66fa3e 100644 --- a/datafusion/datasource/src/mod.rs +++ b/datafusion/datasource/src/mod.rs @@ -56,7 +56,7 @@ pub use self::url::ListingTableUrl; use crate::file_groups::FileGroup; use chrono::TimeZone; use datafusion_common::stats::Precision; -use datafusion_common::{ColumnStatistics, Result, exec_datafusion_err}; +use datafusion_common::{ColumnStatistics, Result, exec_datafusion_err, TableReference}; use datafusion_common::{ScalarValue, Statistics}; use datafusion_physical_expr::LexOrdering; use futures::{Stream, StreamExt}; @@ -152,6 +152,7 @@ pub struct PartitionedFile { pub extensions: Option>, /// The estimated size of the parquet metadata, in bytes pub metadata_size_hint: Option, + pub table_reference: Option, } impl PartitionedFile { @@ -171,6 +172,7 @@ impl PartitionedFile { ordering: None, extensions: None, metadata_size_hint: None, + table_reference: None } } @@ -184,6 +186,7 @@ impl PartitionedFile { ordering: None, extensions: None, metadata_size_hint: None, + table_reference: None } } @@ -203,6 +206,7 @@ impl PartitionedFile { ordering: None, extensions: None, metadata_size_hint: None, + table_reference: None } .with_range(start, end) } @@ -214,6 +218,12 @@ impl PartitionedFile { self } + pub fn with_table_reference(mut self, table_reference: Option) -> Self { + self.table_reference = table_reference; + self + } + + /// Size of the file to be scanned (taking into account the range, if present). pub fn effective_size(&self) -> u64 { if let Some(range) = &self.range { @@ -340,6 +350,7 @@ impl From for PartitionedFile { ordering: None, extensions: None, metadata_size_hint: None, + table_reference: None, } } } @@ -537,6 +548,7 @@ pub fn generate_test_files(num_files: usize, overlap_factor: f64) -> Vec { +pub trait FileStatisticsCache: CacheAccessor { /// Cache memory limit in bytes. fn cache_limit(&self) -> usize; @@ -104,6 +104,9 @@ pub trait FileStatisticsCache: CacheAccessor { /// Retrieves the information about the entries currently cached. fn list_entries(&self) -> HashMap; + + fn drop_table_entries(&self, table_ref: &Option) -> Result<()>; + } impl DFHeapSize for CachedFileMetadata { diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 1c1b6b9e6e692..7a5ddcdfedbfc 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::cache::CacheAccessor; +use crate::cache::{CacheAccessor, TableScopedPath}; use crate::cache::cache_manager::{ CachedFileMetadata, FileStatisticsCache, FileStatisticsCacheEntry, }; @@ -26,6 +26,7 @@ use std::sync::Mutex; pub use crate::cache::DefaultFilesMetadataCache; use crate::cache::lru_queue::LruQueue; use datafusion_common::heap_size::DFHeapSize; +use datafusion_common::TableReference; /// Default implementation of [`FileStatisticsCache`] /// @@ -65,7 +66,7 @@ impl DefaultFileStatisticsCache { } struct DefaultFileStatisticsCacheState { - lru_queue: LruQueue, + lru_queue: LruQueue, memory_limit: usize, memory_used: usize, } @@ -90,16 +91,16 @@ impl DefaultFileStatisticsCacheState { memory_used: 0, } } - fn get(&mut self, key: &Path) -> Option { + fn get(&mut self, key: &TableScopedPath) -> Option { self.lru_queue.get(key).cloned() } fn put( &mut self, - key: &Path, + key: &TableScopedPath, value: CachedFileMetadata, ) -> Option { - let key_size = key.as_ref().heap_size(); + let key_size = key.path.as_ref().heap_size(); let entry_size = value.heap_size(); if entry_size + key_size > self.memory_limit { @@ -114,7 +115,7 @@ impl DefaultFileStatisticsCacheState { if let Some(old_entry) = &old_value { self.memory_used -= old_entry.heap_size(); } else { - self.memory_used += key.as_ref().heap_size(); + self.memory_used += key.path.as_ref().heap_size(); } self.evict_entries(); @@ -122,9 +123,9 @@ impl DefaultFileStatisticsCacheState { old_value } - fn remove(&mut self, k: &Path) -> Option { + fn remove(&mut self, k: &TableScopedPath) -> Option { if let Some(old_entry) = self.lru_queue.remove(k) { - self.memory_used -= k.as_ref().heap_size(); + self.memory_used -= k.path.as_ref().heap_size(); self.memory_used -= old_entry.heap_size(); Some(old_entry) } else { @@ -132,7 +133,7 @@ impl DefaultFileStatisticsCacheState { } } - fn contains_key(&self, k: &Path) -> bool { + fn contains_key(&self, k: &TableScopedPath) -> bool { self.lru_queue.contains_key(k) } @@ -148,7 +149,7 @@ impl DefaultFileStatisticsCacheState { fn evict_entries(&mut self) { while self.memory_used > self.memory_limit { if let Some(removed) = self.lru_queue.pop() { - self.memory_used -= removed.0.as_ref().heap_size(); + self.memory_used -= removed.0.path.as_ref().heap_size(); self.memory_used -= removed.1.heap_size(); } else { // cache is empty while memory_used > memory_limit, cannot happen @@ -168,23 +169,23 @@ impl DefaultFileStatisticsCacheState { } } } -impl CacheAccessor for DefaultFileStatisticsCache { - fn get(&self, key: &Path) -> Option { +impl CacheAccessor for DefaultFileStatisticsCache { + fn get(&self, key: &TableScopedPath) -> Option { let mut state = self.state.lock().unwrap(); state.get(key) } - fn put(&self, key: &Path, value: CachedFileMetadata) -> Option { + fn put(&self, key: &TableScopedPath, value: CachedFileMetadata) -> Option { let mut state = self.state.lock().unwrap(); state.put(key, value) } - fn remove(&self, key: &Path) -> Option { + fn remove(&self, key: &TableScopedPath) -> Option { let mut state = self.state.lock().unwrap(); state.remove(key) } - fn contains_key(&self, k: &Path) -> bool { + fn contains_key(&self, k: &TableScopedPath) -> bool { let state = self.state.lock().unwrap(); state.contains_key(k) } @@ -222,7 +223,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { let path = entry.0.clone(); let cached = entry.1.clone(); entries.insert( - path, + path.path, FileStatisticsCacheEntry { object_meta: cached.meta.clone(), num_rows: cached.statistics.num_rows, @@ -236,6 +237,20 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { entries } + + fn drop_table_entries(&self, table_ref: &Option) -> datafusion_common::Result<()> { + let mut state = self.state.lock().unwrap(); + let mut table_paths = vec![]; + for (path, _) in state.lru_queue.list_entries() { + if path.table == *table_ref { + table_paths.push(path.clone()); + } + } + for path in table_paths { + state.remove(&path); + } + Ok(()) + } } #[cfg(test)] @@ -279,8 +294,13 @@ mod tests { false, )]); + let path = TableScopedPath{ + path: meta.location.clone(), + table: None, + }; + // Cache miss - assert!(cache.get(&meta.location).is_none()); + assert!(cache.get(&path).is_none()); // Put a value let cached_value = CachedFileMetadata::new( @@ -288,17 +308,24 @@ mod tests { Arc::new(Statistics::new_unknown(&schema)), None, ); - cache.put(&meta.location, cached_value); + cache.put(&path, cached_value); // Cache hit - let result = cache.get(&meta.location); + let result = cache.get(&path); assert!(result.is_some()); let cached = result.unwrap(); assert!(cached.is_valid_for(&meta)); + // File size changed - validation should fail let meta2 = create_test_meta("test", 2048); - let cached = cache.get(&meta2.location).unwrap(); + + let path_2 = TableScopedPath{ + path: meta2.location.clone(), + table: None, + }; + + let cached = cache.get(&path_2).unwrap(); assert!(!cached.is_valid_for(&meta2)); // Update with new value @@ -307,12 +334,18 @@ mod tests { Arc::new(Statistics::new_unknown(&schema)), None, ); - cache.put(&meta2.location, cached_value2); + cache.put(&path_2, cached_value2); // Test list_entries let entries = cache.list_entries(); assert_eq!(entries.len(), 1); - let entry = entries.get(&Path::from("test")).unwrap(); + + let path_3 = TableScopedPath{ + path: Path::from("test"), + table: None, + }; + + let entry = entries.get(&path_3.path).unwrap(); assert_eq!(entry.object_meta.size, 2048); // Should be updated value } @@ -379,31 +412,37 @@ mod tests { Arc::new(Statistics::new_unknown(&schema)), None, // No ordering yet ); - cache.put(&meta.location, cached_value); - let result = cache.get(&meta.location).unwrap(); + let path = TableScopedPath { + path: meta.location.clone(), + table: None, + }; + + cache.put(&path, cached_value); + + let result = cache.get(&path).unwrap(); assert!(result.ordering.is_none()); // Update to add ordering - let mut cached = cache.get(&meta.location).unwrap(); + let mut cached = cache.get(&path).unwrap(); if cached.is_valid_for(&meta) && cached.ordering.is_none() { cached.ordering = Some(ordering()); } - cache.put(&meta.location, cached); + cache.put(&path, cached); - let result2 = cache.get(&meta.location).unwrap(); + let result2 = cache.get(&path).unwrap(); assert!(result2.ordering.is_some()); // Verify list_entries shows has_ordering = true let entries = cache.list_entries(); assert_eq!(entries.len(), 1); - assert!(entries.get(&meta.location).unwrap().has_ordering); + assert!(entries.get(&path.path).unwrap().has_ordering); } #[test] fn test_cache_invalidation_on_file_modification() { let cache = DefaultFileStatisticsCache::default(); - let path = Path::from("test.parquet"); + let path = TableScopedPath { table: None, path : Path::from("test.parquet"), }; let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); let meta_v1 = create_test_meta("test.parquet", 100); @@ -439,12 +478,12 @@ mod tests { #[test] fn test_ordering_cache_invalidation_on_file_modification() { let cache = DefaultFileStatisticsCache::default(); - let path = Path::from("test.parquet"); + let path = TableScopedPath { path: Path::from("test.parquet"), table: None }; let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); // Cache with original metadata and ordering let meta_v1 = ObjectMeta { - location: path.clone(), + location: path.path.clone(), last_modified: DateTime::parse_from_rfc3339("2022-09-27T22:36:00+02:00") .unwrap() .into(), @@ -467,7 +506,7 @@ mod tests { // File modified (size changed) let meta_v2 = ObjectMeta { - location: path.clone(), + location: path.path.clone(), last_modified: DateTime::parse_from_rfc3339("2022-09-28T10:00:00+02:00") .unwrap() .into(), @@ -510,14 +549,20 @@ mod tests { Arc::new(Statistics::new_unknown(&schema)), None, ); - cache.put(&meta1.location, cached_value); + + let path_1 = TableScopedPath { path: meta1.location.clone(), table: None }; + + cache.put(&path_1, cached_value); let meta2 = create_test_meta("test2.parquet", 200); let cached_value = CachedFileMetadata::new( meta2.clone(), Arc::new(Statistics::new_unknown(&schema)), Some(ordering()), ); - cache.put(&meta2.location, cached_value); + + let path_2 = TableScopedPath { path: meta2.location.clone(), table: None }; + + cache.put(&path_2, cached_value); let entries = cache.list_entries(); assert_eq!( @@ -562,33 +607,37 @@ mod tests { // create a cache with a limit which fits exactly 2 entries let cache = DefaultFileStatisticsCache::new(limit_for_2_entries); - - cache.put(&meta_1.location, value_1.clone()); - cache.put(&meta_2.location, value_2.clone()); + let path_1 = TableScopedPath { path: meta_1.location.clone(), table: None }; + let path_2 = TableScopedPath { path: meta_2.location.clone(), table: None }; + cache.put(&path_1, value_1.clone()); + cache.put(&path_2, value_2.clone()); assert_eq!(cache.len(), 2); assert_eq!(cache.memory_used(), limit_for_2_entries); - let result_1 = cache.get(&meta_1.location); - let result_2 = cache.get(&meta_2.location); + let result_1 = cache.get(&path_1); + let result_2 = cache.get(&path_2); assert_eq!(result_1.unwrap(), value_1); assert_eq!(result_2.unwrap(), value_2); + let path_3 = TableScopedPath { path: meta_3.location.clone(), table: None }; + + // adding the third entry evicts the first entry - cache.put(&meta_3.location, value_3.clone()); + cache.put(&path_3, value_3.clone()); assert_eq!(cache.len(), 2); assert_eq!(cache.memory_used(), limit_for_2_entries); - let result_1 = cache.get(&meta_1.location); + let result_1 = cache.get(&path_1); assert!(result_1.is_none()); - let result_2 = cache.get(&meta_2.location); - let result_3 = cache.get(&meta_3.location); + let result_2 = cache.get(&path_2); + let result_3 = cache.get(&path_3); assert_eq!(result_2.unwrap(), value_2); assert_eq!(result_3.unwrap(), value_3); - cache.remove(&meta_2.location); + cache.remove(&path_2); assert_eq!(cache.len(), 1); assert_eq!( cache.memory_used(), @@ -609,7 +658,9 @@ mod tests { // create a cache with a size less than the entry let cache = DefaultFileStatisticsCache::new(limit_less_than_the_entry); - cache.put(&meta.location, value); + let path_1 = TableScopedPath { path: meta.location.clone(), table: None }; + + cache.put(&path_1, value); assert_eq!(cache.len(), 0); assert_eq!(cache.memory_used(), 0); From 5c524e00500c54fcdc3fd668c4b88e1980ceca13 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 9 Apr 2026 11:35:48 +0200 Subject: [PATCH 35/70] Adapt slt --- datafusion/sqllogictest/test_files/encrypted_parquet.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/encrypted_parquet.slt b/datafusion/sqllogictest/test_files/encrypted_parquet.slt index d580b7d1ad2b8..f51d84f0c88eb 100644 --- a/datafusion/sqllogictest/test_files/encrypted_parquet.slt +++ b/datafusion/sqllogictest/test_files/encrypted_parquet.slt @@ -85,5 +85,5 @@ float_field float ) STORED AS PARQUET LOCATION 'test_files/scratch/encrypted_parquet/' -query error DataFusion error: Parquet error: Parquet error: Parquet file has an encrypted footer but decryption properties were not provided +query error Parquet error: Parquet error: Parquet file has an encrypted footer but decryption properties were not provided SELECT * FROM parquet_table From 5f970c610913841ed5d6d86941e334b6c7c157b3 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 9 Apr 2026 12:35:07 +0200 Subject: [PATCH 36/70] Fix linter --- datafusion-cli/src/main.rs | 5 +- datafusion/catalog-listing/src/helpers.rs | 2 +- datafusion/catalog-listing/src/table.rs | 5 +- datafusion/core/src/execution/context/mod.rs | 11 ++-- datafusion/datasource/src/mod.rs | 14 ++-- .../execution/src/cache/cache_manager.rs | 5 +- datafusion/execution/src/cache/cache_unit.rs | 66 ++++++++++++++----- 7 files changed, 72 insertions(+), 36 deletions(-) diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 1909ed392afd3..a2ec8445e9437 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -443,10 +443,7 @@ mod tests { use super::*; use datafusion::{ common::test_util::batches_to_string, - execution::cache::{ - DefaultListFilesCache, cache_manager::CacheManagerConfig, - cache_unit::DefaultFileStatisticsCache, - }, + execution::cache::{DefaultListFilesCache, cache_manager::CacheManagerConfig}, prelude::{ParquetReadOptions, col, lit, split_part}, }; use insta::assert_snapshot; diff --git a/datafusion/catalog-listing/src/helpers.rs b/datafusion/catalog-listing/src/helpers.rs index 0faa5834c19ab..c22beba295ed7 100644 --- a/datafusion/catalog-listing/src/helpers.rs +++ b/datafusion/catalog-listing/src/helpers.rs @@ -368,7 +368,7 @@ fn try_into_partitioned_file( let mut pf: PartitionedFile = object_meta.into(); pf.partition_values = partition_values; pf.table_reference = table_path.get_table_ref().clone(); - + Ok(Some(pf)) } diff --git a/datafusion/catalog-listing/src/table.rs b/datafusion/catalog-listing/src/table.rs index 84d2b467524d8..7ee743a6abe71 100644 --- a/datafusion/catalog-listing/src/table.rs +++ b/datafusion/catalog-listing/src/table.rs @@ -799,7 +799,10 @@ impl ListingTable { ) -> datafusion_common::Result<(Arc, Option)> { use datafusion_execution::cache::cache_manager::CachedFileMetadata; - let path = TableScopedPath { table: part_file.table_reference.clone(), path : part_file.object_meta.location.clone()}; + let path = TableScopedPath { + table: part_file.table_reference.clone(), + path: part_file.object_meta.location.clone(), + }; let meta = &part_file.object_meta; // Check cache first - if we have valid cached statistics and ordering diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index df310bd8aa891..903548dda3c3e 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1184,7 +1184,7 @@ impl SessionContext { builder.with_object_list_cache_ttl(Some(duration)) } "file_statistics_cache_limit" => { - let limit = Self::parse_memory_limit(value)?; + let limit = Self::parse_capacity_limit(variable, value)?; builder.with_file_statistics_cache_limit(limit) } _ => return plan_err!("Unknown runtime configuration: {variable}"), @@ -1427,7 +1427,8 @@ impl SessionContext { schema.deregister_table(&table)?; if table_type == TableType::Base && let Some(lfc) = self.runtime_env().cache_manager.get_list_files_cache() - && let Some(fsc) = self.runtime_env().cache_manager.get_file_statistic_cache() + && let Some(fsc) = + self.runtime_env().cache_manager.get_file_statistic_cache() { lfc.drop_table_entries(&Some(table_ref.clone()))?; fsc.drop_table_entries(&Some(table_ref.clone()))?; @@ -1763,9 +1764,9 @@ impl SessionContext { let config = ListingTableConfig::new(table_path) .with_listing_options(options) .with_schema(resolved_schema); - let table = ListingTable::try_new(config)?.with_definition(sql_definition).with_cache( - self.runtime_env().cache_manager.get_file_statistic_cache() - ); + let table = ListingTable::try_new(config)? + .with_definition(sql_definition) + .with_cache(self.runtime_env().cache_manager.get_file_statistic_cache()); self.register_table(table_ref, Arc::new(table))?; Ok(()) } diff --git a/datafusion/datasource/src/mod.rs b/datafusion/datasource/src/mod.rs index 575ffbb66fa3e..d971762782258 100644 --- a/datafusion/datasource/src/mod.rs +++ b/datafusion/datasource/src/mod.rs @@ -56,7 +56,7 @@ pub use self::url::ListingTableUrl; use crate::file_groups::FileGroup; use chrono::TimeZone; use datafusion_common::stats::Precision; -use datafusion_common::{ColumnStatistics, Result, exec_datafusion_err, TableReference}; +use datafusion_common::{ColumnStatistics, Result, TableReference, exec_datafusion_err}; use datafusion_common::{ScalarValue, Statistics}; use datafusion_physical_expr::LexOrdering; use futures::{Stream, StreamExt}; @@ -172,7 +172,7 @@ impl PartitionedFile { ordering: None, extensions: None, metadata_size_hint: None, - table_reference: None + table_reference: None, } } @@ -186,7 +186,7 @@ impl PartitionedFile { ordering: None, extensions: None, metadata_size_hint: None, - table_reference: None + table_reference: None, } } @@ -206,7 +206,7 @@ impl PartitionedFile { ordering: None, extensions: None, metadata_size_hint: None, - table_reference: None + table_reference: None, } .with_range(start, end) } @@ -218,12 +218,14 @@ impl PartitionedFile { self } - pub fn with_table_reference(mut self, table_reference: Option) -> Self { + pub fn with_table_reference( + mut self, + table_reference: Option, + ) -> Self { self.table_reference = table_reference; self } - /// Size of the file to be scanned (taking into account the range, if present). pub fn effective_size(&self) -> u64 { if let Some(range) = &self.range { diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 32f2efcaeebbb..89c9d6a314be4 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -95,7 +95,9 @@ impl CachedFileMetadata { /// 3. If invalid or missing, compute new value and call `put(path, new_value)` /// /// See [`crate::runtime_env::RuntimeEnv`] for more details -pub trait FileStatisticsCache: CacheAccessor { +pub trait FileStatisticsCache: + CacheAccessor +{ /// Cache memory limit in bytes. fn cache_limit(&self) -> usize; @@ -106,7 +108,6 @@ pub trait FileStatisticsCache: CacheAccessor HashMap; fn drop_table_entries(&self, table_ref: &Option) -> Result<()>; - } impl DFHeapSize for CachedFileMetadata { diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 7a5ddcdfedbfc..0d3cba519ad92 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -15,18 +15,18 @@ // specific language governing permissions and limitations // under the License. -use crate::cache::{CacheAccessor, TableScopedPath}; use crate::cache::cache_manager::{ CachedFileMetadata, FileStatisticsCache, FileStatisticsCacheEntry, }; +use crate::cache::{CacheAccessor, TableScopedPath}; use object_store::path::Path; use std::collections::HashMap; use std::sync::Mutex; pub use crate::cache::DefaultFilesMetadataCache; use crate::cache::lru_queue::LruQueue; -use datafusion_common::heap_size::DFHeapSize; use datafusion_common::TableReference; +use datafusion_common::heap_size::DFHeapSize; /// Default implementation of [`FileStatisticsCache`] /// @@ -175,7 +175,11 @@ impl CacheAccessor for DefaultFileStatistic state.get(key) } - fn put(&self, key: &TableScopedPath, value: CachedFileMetadata) -> Option { + fn put( + &self, + key: &TableScopedPath, + value: CachedFileMetadata, + ) -> Option { let mut state = self.state.lock().unwrap(); state.put(key, value) } @@ -238,7 +242,10 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { entries } - fn drop_table_entries(&self, table_ref: &Option) -> datafusion_common::Result<()> { + fn drop_table_entries( + &self, + table_ref: &Option, + ) -> datafusion_common::Result<()> { let mut state = self.state.lock().unwrap(); let mut table_paths = vec![]; for (path, _) in state.lru_queue.list_entries() { @@ -294,7 +301,7 @@ mod tests { false, )]); - let path = TableScopedPath{ + let path = TableScopedPath { path: meta.location.clone(), table: None, }; @@ -313,14 +320,14 @@ mod tests { // Cache hit let result = cache.get(&path); assert!(result.is_some()); + let cached = result.unwrap(); assert!(cached.is_valid_for(&meta)); - // File size changed - validation should fail let meta2 = create_test_meta("test", 2048); - let path_2 = TableScopedPath{ + let path_2 = TableScopedPath { path: meta2.location.clone(), table: None, }; @@ -340,7 +347,7 @@ mod tests { let entries = cache.list_entries(); assert_eq!(entries.len(), 1); - let path_3 = TableScopedPath{ + let path_3 = TableScopedPath { path: Path::from("test"), table: None, }; @@ -442,7 +449,10 @@ mod tests { #[test] fn test_cache_invalidation_on_file_modification() { let cache = DefaultFileStatisticsCache::default(); - let path = TableScopedPath { table: None, path : Path::from("test.parquet"), }; + let path = TableScopedPath { + path: Path::from("test.parquet"), + table: None, + }; let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); let meta_v1 = create_test_meta("test.parquet", 100); @@ -478,7 +488,10 @@ mod tests { #[test] fn test_ordering_cache_invalidation_on_file_modification() { let cache = DefaultFileStatisticsCache::default(); - let path = TableScopedPath { path: Path::from("test.parquet"), table: None }; + let path = TableScopedPath { + path: Path::from("test.parquet"), + table: None, + }; let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); // Cache with original metadata and ordering @@ -550,7 +563,10 @@ mod tests { None, ); - let path_1 = TableScopedPath { path: meta1.location.clone(), table: None }; + let path_1 = TableScopedPath { + path: meta1.location.clone(), + table: None, + }; cache.put(&path_1, cached_value); let meta2 = create_test_meta("test2.parquet", 200); @@ -560,7 +576,10 @@ mod tests { Some(ordering()), ); - let path_2 = TableScopedPath { path: meta2.location.clone(), table: None }; + let path_2 = TableScopedPath { + path: meta2.location.clone(), + table: None, + }; cache.put(&path_2, cached_value); @@ -607,8 +626,16 @@ mod tests { // create a cache with a limit which fits exactly 2 entries let cache = DefaultFileStatisticsCache::new(limit_for_2_entries); - let path_1 = TableScopedPath { path: meta_1.location.clone(), table: None }; - let path_2 = TableScopedPath { path: meta_2.location.clone(), table: None }; + let path_1 = TableScopedPath { + path: meta_1.location.clone(), + table: None, + }; + + let path_2 = TableScopedPath { + path: meta_2.location.clone(), + table: None, + }; + cache.put(&path_1, value_1.clone()); cache.put(&path_2, value_2.clone()); @@ -620,8 +647,10 @@ mod tests { assert_eq!(result_1.unwrap(), value_1); assert_eq!(result_2.unwrap(), value_2); - let path_3 = TableScopedPath { path: meta_3.location.clone(), table: None }; - + let path_3 = TableScopedPath { + path: meta_3.location.clone(), + table: None, + }; // adding the third entry evicts the first entry cache.put(&path_3, value_3.clone()); @@ -658,7 +687,10 @@ mod tests { // create a cache with a size less than the entry let cache = DefaultFileStatisticsCache::new(limit_less_than_the_entry); - let path_1 = TableScopedPath { path: meta.location.clone(), table: None }; + let path_1 = TableScopedPath { + path: meta.location.clone(), + table: None, + }; cache.put(&path_1, value); From 530873b54af00489f59835c0bebbdb554603647b Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 9 Apr 2026 12:40:03 +0200 Subject: [PATCH 37/70] Remove uneeded clone --- datafusion/execution/src/cache/cache_unit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 0d3cba519ad92..d008a626170a4 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -225,7 +225,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { let mut entries = HashMap::::new(); for entry in self.state.lock().unwrap().lru_queue.list_entries() { let path = entry.0.clone(); - let cached = entry.1.clone(); + let cached = entry.1; entries.insert( path.path, FileStatisticsCacheEntry { From 08c29da960175d7f2a725c8a79d372130cb8d3e1 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 9 Apr 2026 12:44:35 +0200 Subject: [PATCH 38/70] Rename cache_unit to file_statistics_cache --- datafusion/core/src/datasource/listing_table_factory.rs | 2 +- datafusion/core/src/execution/context/mod.rs | 2 +- datafusion/core/tests/parquet/file_statistics.rs | 2 +- datafusion/core/tests/sql/runtime_config.rs | 2 +- datafusion/execution/src/cache/cache_manager.rs | 2 +- .../src/cache/{cache_unit.rs => file_statistics_cache.rs} | 0 datafusion/execution/src/cache/mod.rs | 2 +- 7 files changed, 6 insertions(+), 6 deletions(-) rename datafusion/execution/src/cache/{cache_unit.rs => file_statistics_cache.rs} (100%) diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs index 80ca15386308e..d8d1d543c75e9 100644 --- a/datafusion/core/src/datasource/listing_table_factory.rs +++ b/datafusion/core/src/datasource/listing_table_factory.rs @@ -231,7 +231,7 @@ mod tests { }; use datafusion_execution::cache::CacheAccessor; use datafusion_execution::cache::cache_manager::CacheManagerConfig; - use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache; + use datafusion_execution::cache::file_statistics_cache::DefaultFileStatisticsCache; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnvBuilder; use glob::Pattern; diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 903548dda3c3e..bf97a6b8f4b8d 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -101,7 +101,7 @@ use datafusion_session::SessionStore; use async_trait::async_trait; use chrono::{DateTime, Utc}; -use datafusion_execution::cache::cache_unit::DEFAULT_FILE_STATISTICS_MEMORY_LIMIT; +use datafusion_execution::cache::file_statistics_cache::DEFAULT_FILE_STATISTICS_MEMORY_LIMIT; use object_store::ObjectStore; use parking_lot::RwLock; use url::Url; diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs index da89b89cee116..4dcdf543b929b 100644 --- a/datafusion/core/tests/parquet/file_statistics.rs +++ b/datafusion/core/tests/parquet/file_statistics.rs @@ -31,7 +31,7 @@ use datafusion_common::DFSchema; use datafusion_common::stats::Precision; use datafusion_execution::cache::DefaultListFilesCache; use datafusion_execution::cache::cache_manager::CacheManagerConfig; -use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache; +use datafusion_execution::cache::file_statistics_cache::DefaultFileStatisticsCache; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_expr::{Expr, col, lit}; diff --git a/datafusion/core/tests/sql/runtime_config.rs b/datafusion/core/tests/sql/runtime_config.rs index 5998148c42d0b..407d7f95106bb 100644 --- a/datafusion/core/tests/sql/runtime_config.rs +++ b/datafusion/core/tests/sql/runtime_config.rs @@ -25,7 +25,7 @@ use datafusion::execution::context::TaskContext; use datafusion::prelude::SessionConfig; use datafusion_execution::cache::DefaultListFilesCache; use datafusion_execution::cache::cache_manager::CacheManagerConfig; -use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache; +use datafusion_execution::cache::file_statistics_cache::DefaultFileStatisticsCache; use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_physical_plan::common::collect; diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 89c9d6a314be4..d8f46ff40ee4f 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -17,7 +17,7 @@ use crate::cache::CacheAccessor; use crate::cache::DefaultListFilesCache; -use crate::cache::cache_unit::{ +use crate::cache::file_statistics_cache::{ DEFAULT_FILE_STATISTICS_MEMORY_LIMIT, DefaultFileStatisticsCache, DefaultFilesMetadataCache, }; diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/file_statistics_cache.rs similarity index 100% rename from datafusion/execution/src/cache/cache_unit.rs rename to datafusion/execution/src/cache/file_statistics_cache.rs diff --git a/datafusion/execution/src/cache/mod.rs b/datafusion/execution/src/cache/mod.rs index 0380e50c0935c..76bd660e6c7d5 100644 --- a/datafusion/execution/src/cache/mod.rs +++ b/datafusion/execution/src/cache/mod.rs @@ -16,7 +16,7 @@ // under the License. pub mod cache_manager; -pub mod cache_unit; +pub mod file_statistics_cache; pub mod lru_queue; mod file_metadata_cache; From fee757dd0da951b41cf1a6ad2b674e3160387620 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 9 Apr 2026 13:01:12 +0200 Subject: [PATCH 39/70] Simplify heap size accounting --- datafusion/execution/src/cache/file_statistics_cache.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index d008a626170a4..f0529579db2ee 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -111,11 +111,10 @@ impl DefaultFileStatisticsCacheState { let old_value = self.lru_queue.put(key.clone(), value); self.memory_used += entry_size; + self.memory_used += key.path.as_ref().heap_size(); if let Some(old_entry) = &old_value { self.memory_used -= old_entry.heap_size(); - } else { - self.memory_used += key.path.as_ref().heap_size(); } self.evict_entries(); From ddfa18f474702dc2d41823569310a9dde07f4526 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Fri, 10 Apr 2026 12:18:14 +0200 Subject: [PATCH 40/70] Adapt comments in test --- datafusion-cli/src/main.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index a2ec8445e9437..6e99969aa763e 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -653,8 +653,6 @@ mod tests { Ok(()) } - /// Shows that the statistics cache is not enabled by default yet - /// See https://github.com/apache/datafusion/issues/19217 #[tokio::test] async fn test_statistics_cache_default() -> Result<(), DataFusionError> { let ctx = SessionContext::new(); @@ -684,8 +682,6 @@ mod tests { .await?; } - // When the cache manager creates a StatisticsCache by default, - // the contents will show up here let sql = "SELECT split_part(path, '/', -1) as filename, file_size_bytes, num_rows, num_columns, table_size_bytes from statistics_cache() order by filename"; let df = ctx.sql(sql).await?; let rbs = df.collect().await?; From 578e8f43ae329ae5cbe08b22e085a51d797b0d36 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Fri, 10 Apr 2026 15:06:30 +0200 Subject: [PATCH 41/70] Seperate drop table clean-ups --- datafusion/core/src/execution/context/mod.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index bf97a6b8f4b8d..0a089c611825a 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1425,17 +1425,16 @@ impl SessionContext { && table_provider.table_type() == table_type { schema.deregister_table(&table)?; - if table_type == TableType::Base - && let Some(lfc) = self.runtime_env().cache_manager.get_list_files_cache() - && let Some(fsc) = - self.runtime_env().cache_manager.get_file_statistic_cache() - { - lfc.drop_table_entries(&Some(table_ref.clone()))?; - fsc.drop_table_entries(&Some(table_ref.clone()))?; + if table_type == TableType::Base { + if let Some(lfc) = self.runtime_env().cache_manager.get_list_files_cache() { + lfc.drop_table_entries(&Some(table_ref.clone()))?; + } + if let Some(fsc) = self.runtime_env().cache_manager.get_file_statistic_cache() { + fsc.drop_table_entries(&Some(table_ref.clone()))?; + } + return Ok(true); } - return Ok(true); } - Ok(false) } From 88084319df4d57186f9cec46fe0078d8157852b0 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Fri, 10 Apr 2026 20:10:15 +0200 Subject: [PATCH 42/70] fixup! Seperate drop table clean-ups --- datafusion/core/src/execution/context/mod.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 0a089c611825a..f52de52f61f31 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1426,10 +1426,13 @@ impl SessionContext { { schema.deregister_table(&table)?; if table_type == TableType::Base { - if let Some(lfc) = self.runtime_env().cache_manager.get_list_files_cache() { + if let Some(lfc) = self.runtime_env().cache_manager.get_list_files_cache() + { lfc.drop_table_entries(&Some(table_ref.clone()))?; } - if let Some(fsc) = self.runtime_env().cache_manager.get_file_statistic_cache() { + if let Some(fsc) = + self.runtime_env().cache_manager.get_file_statistic_cache() + { fsc.drop_table_entries(&Some(table_ref.clone()))?; } return Ok(true); From 4360af75e6f497b0832a20f2d023318fa47fb480 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 10:33:46 +0200 Subject: [PATCH 43/70] Increase default limit to 10 mb --- datafusion/execution/src/cache/file_statistics_cache.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index f0529579db2ee..8c45763c12538 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -71,7 +71,7 @@ struct DefaultFileStatisticsCacheState { memory_used: usize, } -pub const DEFAULT_FILE_STATISTICS_MEMORY_LIMIT: usize = 1024 * 1024; // 1MiB +pub const DEFAULT_FILE_STATISTICS_MEMORY_LIMIT: usize = 10 * 1024 * 1024; // 10MiB impl Default for DefaultFileStatisticsCacheState { fn default() -> Self { From 3b9a0576fbfd433d5fd27d5fdf28ba054cb63bb6 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 10:49:35 +0200 Subject: [PATCH 44/70] Increase default limit to 20 mb --- datafusion/execution/src/cache/file_statistics_cache.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index 8c45763c12538..46f08ebf884e1 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -71,7 +71,7 @@ struct DefaultFileStatisticsCacheState { memory_used: usize, } -pub const DEFAULT_FILE_STATISTICS_MEMORY_LIMIT: usize = 10 * 1024 * 1024; // 10MiB +pub const DEFAULT_FILE_STATISTICS_MEMORY_LIMIT: usize = 20 * 1024 * 1024; // 10MiB impl Default for DefaultFileStatisticsCacheState { fn default() -> Self { From 272c2a0802078c15c246ba3570ca26292ca29633 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 11:41:30 +0200 Subject: [PATCH 45/70] Fix comment --- datafusion/execution/src/cache/file_statistics_cache.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index 46f08ebf884e1..4d3f87724b941 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -71,7 +71,7 @@ struct DefaultFileStatisticsCacheState { memory_used: usize, } -pub const DEFAULT_FILE_STATISTICS_MEMORY_LIMIT: usize = 20 * 1024 * 1024; // 10MiB +pub const DEFAULT_FILE_STATISTICS_MEMORY_LIMIT: usize = 20 * 1024 * 1024; // 20MiB impl Default for DefaultFileStatisticsCacheState { fn default() -> Self { From 44a716e19806c6d14435eec3b104ac899ddce84a Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 11:59:10 +0200 Subject: [PATCH 46/70] Fix deregister logic --- datafusion/core/src/execution/context/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index f52de52f61f31..ceaa19042e764 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1435,8 +1435,8 @@ impl SessionContext { { fsc.drop_table_entries(&Some(table_ref.clone()))?; } - return Ok(true); } + return Ok(true); } Ok(false) } From 23014c4f44e6d1e4000b12bdb4cd17a6fb0cd994 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 12:49:48 +0200 Subject: [PATCH 47/70] Fix slt --- datafusion/sqllogictest/test_files/information_schema.slt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index b4faa414e3acb..b7593e13c7296 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -331,8 +331,8 @@ datafusion.optimizer.repartition_windows true datafusion.optimizer.skip_failed_rules false datafusion.optimizer.subset_repartition_threshold 4 datafusion.optimizer.top_down_join_key_reordering true -datafusion.runtime.file_statistics_cache_limit 1M datafusion.optimizer.use_statistics_registry false +datafusion.runtime.file_statistics_cache_limit 20M datafusion.runtime.list_files_cache_limit 1M datafusion.runtime.list_files_cache_ttl NULL datafusion.runtime.max_temp_directory_size 100G @@ -479,8 +479,8 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail datafusion.optimizer.subset_repartition_threshold 4 Partition count threshold for subset satisfaction optimization. When the current partition count is >= this threshold, DataFusion will skip repartitioning if the required partitioning expression is a subset of the current partition expression such as Hash(a) satisfies Hash(a, b). When the current partition count is < this threshold, DataFusion will repartition to increase parallelism even when subset satisfaction applies. Set to 0 to always repartition (disable subset satisfaction optimization). Set to a high value to always use subset satisfaction. Example (subset_repartition_threshold = 4): ```text Hash([a]) satisfies Hash([a, b]) because (Hash([a, b]) is subset of Hash([a]) If current partitions (3) < threshold (4), repartition: AggregateExec: mode=FinalPartitioned, gby=[a, b], aggr=[SUM(x)] RepartitionExec: partitioning=Hash([a, b], 8), input_partitions=3 AggregateExec: mode=Partial, gby=[a, b], aggr=[SUM(x)] DataSourceExec: file_groups={...}, output_partitioning=Hash([a], 3) If current partitions (8) >= threshold (4), use subset satisfaction: AggregateExec: mode=SinglePartitioned, gby=[a, b], aggr=[SUM(x)] DataSourceExec: file_groups={...}, output_partitioning=Hash([a], 8) ``` datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys -datafusion.runtime.file_statistics_cache_limit 1M Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. datafusion.optimizer.use_statistics_registry false When set to true, the physical plan optimizer uses the pluggable `StatisticsRegistry` for statistics propagation across operators. This enables more accurate cardinality estimates compared to each operator's built-in `partition_statistics`. +datafusion.runtime.file_statistics_cache_limit 20M Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. datafusion.runtime.list_files_cache_limit 1M Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. datafusion.runtime.list_files_cache_ttl NULL TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes. datafusion.runtime.max_temp_directory_size 100G Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. From 7154c1cec1dfed261e4a01c1bdef897b119f54e4 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 14:07:04 +0200 Subject: [PATCH 48/70] Add table reference to FileStatisticsCacheEntry --- datafusion/execution/src/cache/cache_manager.rs | 2 ++ .../execution/src/cache/file_statistics_cache.rs | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index d8f46ff40ee4f..0f80ae1ad1371 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -137,6 +137,8 @@ pub struct FileStatisticsCacheEntry { pub statistics_size_bytes: usize, /// Whether ordering information is cached for this file. pub has_ordering: bool, + /// Reference to the table associated with this statistics entry. + pub table_reference: Option } /// Cached file listing. diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index 4d3f87724b941..142f269ff4c66 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -115,6 +115,7 @@ impl DefaultFileStatisticsCacheState { if let Some(old_entry) = &old_value { self.memory_used -= old_entry.heap_size(); + self.memory_used -= key.path.as_ref().heap_size(); } self.evict_entries(); @@ -234,6 +235,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { table_size_bytes: cached.statistics.total_byte_size, statistics_size_bytes: cached.statistics.heap_size(), has_ordering: cached.ordering.is_some(), + table_reference: path.table }, ); } @@ -595,6 +597,7 @@ mod tests { table_size_bytes: Precision::Absent, statistics_size_bytes: 304, has_ordering: false, + table_reference: None, } ), ( @@ -606,6 +609,7 @@ mod tests { table_size_bytes: Precision::Absent, statistics_size_bytes: 304, has_ordering: true, + table_reference: None, } ), ]) @@ -665,6 +669,12 @@ mod tests { assert_eq!(result_2.unwrap(), value_2); assert_eq!(result_3.unwrap(), value_3); + // add the third entry again, making sure memory usage remains the same + cache.put(&path_3, value_3.clone()); + assert_eq!(cache.memory_used(), limit_for_2_entries); + cache.put(&path_3, value_3.clone()); + assert_eq!(cache.memory_used(), limit_for_2_entries); + cache.remove(&path_2); assert_eq!(cache.len(), 1); assert_eq!( From 75dbe20a365e396813063c014580d7103f6e2622 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 14:13:52 +0200 Subject: [PATCH 49/70] fixup! Add table reference to FileStatisticsCacheEntry --- datafusion/execution/src/cache/cache_manager.rs | 2 +- datafusion/execution/src/cache/file_statistics_cache.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 0f80ae1ad1371..5ad5ea0293073 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -138,7 +138,7 @@ pub struct FileStatisticsCacheEntry { /// Whether ordering information is cached for this file. pub has_ordering: bool, /// Reference to the table associated with this statistics entry. - pub table_reference: Option + pub table_reference: Option, } /// Cached file listing. diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index 142f269ff4c66..1faeff4fa7a93 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -235,7 +235,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { table_size_bytes: cached.statistics.total_byte_size, statistics_size_bytes: cached.statistics.heap_size(), has_ordering: cached.ordering.is_some(), - table_reference: path.table + table_reference: path.table, }, ); } From c4f7c0e999f6b3da4e93a1c5e6922855d1bea771 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 22:54:53 +0200 Subject: [PATCH 50/70] Fix comment --- datafusion/execution/src/cache/cache_manager.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 5ad5ea0293073..066876945f995 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -458,7 +458,7 @@ pub struct CacheManagerConfig { /// Enabling the cache avoids repeatedly reading file statistics in a DataFusion session. /// Default is enabled with 1MiB. Currently only Parquet files are supported. pub file_statistics_cache: Option>, - /// Limit of the file statistics cache, in bytes. Default: 1MiB. + /// Limit of the file statistics cache, in bytes. Default: 20MiB. pub file_statistics_cache_limit: usize, /// Enable caching of file metadata when listing files. /// Enabling the cache avoids repeat list and object metadata fetch operations, which may be From 81b42fbe0dbaeff0c3dfd9a954faf0760975d015 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Mon, 20 Apr 2026 01:53:41 +0200 Subject: [PATCH 51/70] Fix runtime_env entry --- datafusion/execution/src/runtime_env.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/runtime_env.rs b/datafusion/execution/src/runtime_env.rs index e393a7a127873..0fbfc26d6d6b8 100644 --- a/datafusion/execution/src/runtime_env.rs +++ b/datafusion/execution/src/runtime_env.rs @@ -535,7 +535,7 @@ impl RuntimeEnvBuilder { Some("50M".to_owned()), Some("1M".to_owned()), None, - Some("1M".to_owned()), + Some("20M".to_owned()), ) } From e8e9bf26f86f34208c8e00ef224a895a5dda11b7 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 21 Apr 2026 10:38:46 +0200 Subject: [PATCH 52/70] Add cache for all benchmark runs --- benchmarks/src/bin/external_aggr.rs | 4 +++- benchmarks/src/imdb/run.rs | 4 +++- benchmarks/src/sort_pushdown.rs | 4 +++- benchmarks/src/sort_tpch.rs | 4 +++- benchmarks/src/tpcds/run.rs | 4 +++- benchmarks/src/tpch/run.rs | 4 +++- 6 files changed, 18 insertions(+), 6 deletions(-) diff --git a/benchmarks/src/bin/external_aggr.rs b/benchmarks/src/bin/external_aggr.rs index ee604ec7365a1..a6e322c7fabc0 100644 --- a/benchmarks/src/bin/external_aggr.rs +++ b/benchmarks/src/bin/external_aggr.rs @@ -326,7 +326,9 @@ impl ExternalAggrConfig { let config = ListingTableConfig::new(table_path).with_listing_options(options); let config = config.infer_schema(&state).await?; - Ok(Arc::new(ListingTable::try_new(config)?)) + Ok(Arc::new(ListingTable::try_new(config)?.with_cache( + ctx.runtime_env().cache_manager.get_file_statistic_cache(), + ))) } fn iterations(&self) -> usize { diff --git a/benchmarks/src/imdb/run.rs b/benchmarks/src/imdb/run.rs index ca9710a920517..6d3b5c6bafb40 100644 --- a/benchmarks/src/imdb/run.rs +++ b/benchmarks/src/imdb/run.rs @@ -470,7 +470,9 @@ impl RunOpt { _ => unreachable!(), }; - Ok(Arc::new(ListingTable::try_new(config)?)) + Ok(Arc::new(ListingTable::try_new(config)?.with_cache( + ctx.runtime_env().cache_manager.get_file_statistic_cache(), + ))) } fn iterations(&self) -> usize { diff --git a/benchmarks/src/sort_pushdown.rs b/benchmarks/src/sort_pushdown.rs index e7fce1921e7a8..8e34706ac140a 100644 --- a/benchmarks/src/sort_pushdown.rs +++ b/benchmarks/src/sort_pushdown.rs @@ -273,7 +273,9 @@ impl RunOpt { .with_listing_options(options) .with_schema(schema); - Ok(Arc::new(ListingTable::try_new(config)?)) + Ok(Arc::new(ListingTable::try_new(config)?.with_cache( + ctx.runtime_env().cache_manager.get_file_statistic_cache(), + ))) } fn iterations(&self) -> usize { diff --git a/benchmarks/src/sort_tpch.rs b/benchmarks/src/sort_tpch.rs index 95c90d826de20..206911c45adde 100644 --- a/benchmarks/src/sort_tpch.rs +++ b/benchmarks/src/sort_tpch.rs @@ -351,7 +351,9 @@ impl RunOpt { .with_listing_options(options) .with_schema(schema); - Ok(Arc::new(ListingTable::try_new(config)?)) + Ok(Arc::new(ListingTable::try_new(config)?.with_cache( + ctx.runtime_env().cache_manager.get_file_statistic_cache(), + ))) } fn iterations(&self) -> usize { diff --git a/benchmarks/src/tpcds/run.rs b/benchmarks/src/tpcds/run.rs index f7ef6991515da..58821340034da 100644 --- a/benchmarks/src/tpcds/run.rs +++ b/benchmarks/src/tpcds/run.rs @@ -347,7 +347,9 @@ impl RunOpt { .with_listing_options(options) .with_schema(schema); - Ok(Arc::new(ListingTable::try_new(config)?)) + Ok(Arc::new(ListingTable::try_new(config)?.with_cache( + ctx.runtime_env().cache_manager.get_file_statistic_cache(), + ))) } fn iterations(&self) -> usize { diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs index ec7aa8c554a28..75983ee141d93 100644 --- a/benchmarks/src/tpch/run.rs +++ b/benchmarks/src/tpch/run.rs @@ -342,7 +342,9 @@ impl RunOpt { .with_listing_options(options) .with_schema(schema); - Ok(Arc::new(ListingTable::try_new(config)?)) + Ok(Arc::new(ListingTable::try_new(config)?.with_cache( + ctx.runtime_env().cache_manager.get_file_statistic_cache(), + ))) } fn iterations(&self) -> usize { From eaff595899b727888326af609026f7580f6c9c6d Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 21 Apr 2026 12:58:18 +0200 Subject: [PATCH 53/70] Add cache to listing table creation --- datafusion/core/tests/parquet/file_statistics.rs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs index 4dcdf543b929b..98ffa119d5a3d 100644 --- a/datafusion/core/tests/parquet/file_statistics.rs +++ b/datafusion/core/tests/parquet/file_statistics.rs @@ -30,7 +30,7 @@ use datafusion::prelude::SessionContext; use datafusion_common::DFSchema; use datafusion_common::stats::Precision; use datafusion_execution::cache::DefaultListFilesCache; -use datafusion_execution::cache::cache_manager::CacheManagerConfig; +use datafusion_execution::cache::cache_manager::{CacheManagerConfig, FileStatisticsCache}; use datafusion_execution::cache::file_statistics_cache::DefaultFileStatisticsCache; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnvBuilder; @@ -236,7 +236,7 @@ async fn list_files_with_session_level_cache() { async fn get_listing_table( table_path: &ListingTableUrl, - static_cache: Option>, + static_cache: Option>, opt: &ListingOptions, ) -> ListingTable { let schema = opt @@ -249,12 +249,7 @@ async fn get_listing_table( let config1 = ListingTableConfig::new(table_path.clone()) .with_listing_options(opt.clone()) .with_schema(schema); - let table = ListingTable::try_new(config1).unwrap(); - if let Some(c) = static_cache { - table.with_cache(Some(c)) - } else { - table - } + ListingTable::try_new(config1).unwrap().with_cache(static_cache) } fn get_cache_runtime_state() -> ( From f9dd8b3331d4ca15dad01f745d958ff91589b32c Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 21 Apr 2026 20:19:14 +0200 Subject: [PATCH 54/70] fixup! Add cache to listing table creation --- datafusion/core/tests/parquet/file_statistics.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs index 98ffa119d5a3d..3e3b90a348b04 100644 --- a/datafusion/core/tests/parquet/file_statistics.rs +++ b/datafusion/core/tests/parquet/file_statistics.rs @@ -30,7 +30,9 @@ use datafusion::prelude::SessionContext; use datafusion_common::DFSchema; use datafusion_common::stats::Precision; use datafusion_execution::cache::DefaultListFilesCache; -use datafusion_execution::cache::cache_manager::{CacheManagerConfig, FileStatisticsCache}; +use datafusion_execution::cache::cache_manager::{ + CacheManagerConfig, FileStatisticsCache, +}; use datafusion_execution::cache::file_statistics_cache::DefaultFileStatisticsCache; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnvBuilder; @@ -249,7 +251,9 @@ async fn get_listing_table( let config1 = ListingTableConfig::new(table_path.clone()) .with_listing_options(opt.clone()) .with_schema(schema); - ListingTable::try_new(config1).unwrap().with_cache(static_cache) + ListingTable::try_new(config1) + .unwrap() + .with_cache(static_cache) } fn get_cache_runtime_state() -> ( From bdc98da1b786e7c4a8aea6b83670304870805e2d Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 22 Apr 2026 07:04:50 +0200 Subject: [PATCH 55/70] Adapt limit to 20M in configs.md --- docs/source/user-guide/configs.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 6a8014ddf1d8f..4a3f3cc6d7bc9 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -230,8 +230,8 @@ SET datafusion.runtime.memory_limit = '2G'; The following runtime configuration settings are available: | key | default | description | -| ---------------------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| datafusion.runtime.file_statistics_cache_limit | 1M | Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | +| ---------------------------------------------- |---------| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| datafusion.runtime.file_statistics_cache_limit | 20M | Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | | datafusion.runtime.list_files_cache_limit | 1M | Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | | datafusion.runtime.list_files_cache_ttl | NULL | TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes. | | datafusion.runtime.max_temp_directory_size | 100G | Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | From 875fced692f9f6f70c4681d7880ac8048890d6e0 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 22 Apr 2026 07:20:17 +0200 Subject: [PATCH 56/70] fixup! Adapt limit to 20M in configs.md --- docs/source/user-guide/configs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 4a3f3cc6d7bc9..8b486bcf6cdc9 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -230,7 +230,7 @@ SET datafusion.runtime.memory_limit = '2G'; The following runtime configuration settings are available: | key | default | description | -| ---------------------------------------------- |---------| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ---------------------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | datafusion.runtime.file_statistics_cache_limit | 20M | Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | | datafusion.runtime.list_files_cache_limit | 1M | Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | | datafusion.runtime.list_files_cache_ttl | NULL | TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes. | From 01de9f63240c93307faab7c598c2e54e8dc5f025 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 22 Apr 2026 09:23:17 +0200 Subject: [PATCH 57/70] Fix linter --- datafusion/catalog-listing/src/helpers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/catalog-listing/src/helpers.rs b/datafusion/catalog-listing/src/helpers.rs index c22beba295ed7..ab07e2eabe786 100644 --- a/datafusion/catalog-listing/src/helpers.rs +++ b/datafusion/catalog-listing/src/helpers.rs @@ -367,7 +367,7 @@ fn try_into_partitioned_file( let mut pf: PartitionedFile = object_meta.into(); pf.partition_values = partition_values; - pf.table_reference = table_path.get_table_ref().clone(); + pf.table_reference.clone_from(table_path.get_table_ref()); Ok(Some(pf)) } From 2701a8cd48ff45bc2c266c3e5c901e684ca983f4 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 22 Apr 2026 09:24:12 +0200 Subject: [PATCH 58/70] Add cache to listing table in _read_type() --- datafusion/core/src/execution/context/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index ceaa19042e764..d0b8f368cdfb3 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1671,7 +1671,8 @@ impl SessionContext { let config = ListingTableConfig::new_with_multi_paths(table_paths) .with_listing_options(listing_options) .with_schema(resolved_schema); - let provider = ListingTable::try_new(config)?; + let provider = ListingTable::try_new(config)? + .with_cache(self.runtime_env().cache_manager.get_file_statistic_cache()); self.read_table(Arc::new(provider)) } From a3b8132760629045cc82e014beff411b9af7e97e Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 22 Apr 2026 13:07:55 +0200 Subject: [PATCH 59/70] Add ListView and LargeListView to heapsize --- datafusion/common/src/heap_size.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 1acc3486eb51c..78cba7bd41e26 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -17,9 +17,7 @@ use crate::stats::Precision; use crate::{ColumnStatistics, ScalarValue, Statistics}; -use arrow::array::{ - Array, FixedSizeListArray, LargeListArray, ListArray, MapArray, StructArray, -}; +use arrow::array::{Array, FixedSizeListArray, LargeListArray, LargeListViewArray, ListArray, ListViewArray, MapArray, StructArray}; use arrow::datatypes::{ DataType, Field, Fields, IntervalDayTime, IntervalMonthDayNano, IntervalUnit, TimeUnit, UnionFields, UnionMode, i256, @@ -122,6 +120,8 @@ impl DFHeapSize for ScalarValue { Union(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), Dictionary(a, b) => a.heap_size() + b.heap_size(), RunEndEncoded(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), + ListView(a) => a.heap_size(), + LargeListView(a) => a.heap_size(), } } } @@ -258,12 +258,24 @@ impl DFHeapSize for LargeListArray { } } +impl DFHeapSize for LargeListViewArray { + fn heap_size(&self) -> usize { + self.get_array_memory_size() + } +} + impl DFHeapSize for ListArray { fn heap_size(&self) -> usize { self.get_array_memory_size() } } +impl DFHeapSize for ListViewArray { + fn heap_size(&self) -> usize { + self.get_array_memory_size() + } +} + impl DFHeapSize for FixedSizeListArray { fn heap_size(&self) -> usize { self.get_array_memory_size() From 2cfeaecf237a49044a10ea9197b6430674130bcf Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 22 Apr 2026 13:14:22 +0200 Subject: [PATCH 60/70] fixup! Add ListView and LargeListView to heapsize --- datafusion/common/src/heap_size.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 78cba7bd41e26..b744225681450 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -17,7 +17,10 @@ use crate::stats::Precision; use crate::{ColumnStatistics, ScalarValue, Statistics}; -use arrow::array::{Array, FixedSizeListArray, LargeListArray, LargeListViewArray, ListArray, ListViewArray, MapArray, StructArray}; +use arrow::array::{ + Array, FixedSizeListArray, LargeListArray, LargeListViewArray, ListArray, + ListViewArray, MapArray, StructArray, +}; use arrow::datatypes::{ DataType, Field, Fields, IntervalDayTime, IntervalMonthDayNano, IntervalUnit, TimeUnit, UnionFields, UnionMode, i256, From 16488a1310dc31f2269e5ac6a8e87b187e236abb Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 22 Apr 2026 14:31:19 +0200 Subject: [PATCH 61/70] Remove array.slt --- datafusion/sqllogictest/test_files/array.slt | 9945 ------------------ 1 file changed, 9945 deletions(-) delete mode 100644 datafusion/sqllogictest/test_files/array.slt diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt deleted file mode 100644 index 81d5c8f91a5bc..0000000000000 --- a/datafusion/sqllogictest/test_files/array.slt +++ /dev/null @@ -1,9945 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -############# -## Array Expressions Tests -############# - -### Tables - -statement ok -CREATE TABLE values( - a INT, - b INT, - c INT, - d FLOAT, - e VARCHAR, - f VARCHAR -) AS VALUES - (1, 1, 2, 1.1, 'Lorem', 'A'), - (2, 3, 4, 2.2, 'ipsum', ''), - (3, 5, 6, 3.3, 'dolor', 'BB'), - (4, 7, 8, 4.4, 'sit', NULL), - (NULL, 9, 10, 5.5, 'amet', 'CCC'), - (5, NULL, 12, 6.6, ',', 'DD'), - (6, 11, NULL, 7.7, 'consectetur', 'E'), - (7, 13, 14, NULL, 'adipiscing', 'F'), - (8, 15, 16, 8.8, NULL, '') -; - -statement ok -CREATE TABLE values_without_nulls -AS VALUES - (1, 1, 2, 1.1, 'Lorem', 'A'), - (2, 3, 4, 2.2, 'ipsum', ''), - (3, 5, 6, 3.3, 'dolor', 'BB'), - (4, 7, 8, 4.4, 'sit', NULL), - (5, 9, 10, 5.5, 'amet', 'CCC'), - (6, 11, 12, 6.6, ',', 'DD'), - (7, 13, 14, 7.7, 'consectetur', 'E'), - (8, 15, 16, 8.8, 'adipiscing', 'F'), - (9, 17, 18, 9.9, 'elit', '') -; - -statement ok -CREATE TABLE arrays -AS VALUES - (make_array(make_array(NULL, 2),make_array(3, NULL)), make_array(1.1, 2.2, 3.3), make_array('L', 'o', 'r', 'e', 'm')), - (make_array(make_array(3, 4),make_array(5, 6)), make_array(NULL, 5.5, 6.6), make_array('i', 'p', NULL, 'u', 'm')), - (make_array(make_array(5, 6),make_array(7, 8)), make_array(7.7, 8.8, 9.9), make_array('d', NULL, 'l', 'o', 'r')), - (make_array(make_array(7, NULL),make_array(9, 10)), make_array(10.1, NULL, 12.2), make_array('s', 'i', 't')), - (NULL, make_array(13.3, 14.4, 15.5), make_array('a', 'm', 'e', 't')), - (make_array(make_array(11, 12),make_array(13, 14)), NULL, make_array(',')), - (make_array(make_array(15, 16),make_array(NULL, 18)), make_array(16.6, 17.7, 18.8), NULL) -; - -statement ok -CREATE TABLE large_arrays -AS - SELECT - arrow_cast(column1, 'LargeList(List(Int64))') AS column1, - arrow_cast(column2, 'LargeList(Float64)') AS column2, - arrow_cast(column3, 'LargeList(Utf8)') AS column3 - FROM arrays -; - -statement ok -CREATE TABLE fixed_size_arrays -AS VALUES - (arrow_cast(make_array(make_array(NULL, 2),make_array(3, NULL)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(1.1, 2.2, 3.3), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('L', 'o', 'r', 'e', 'm'), 'FixedSizeList(5, Utf8)')), - (arrow_cast(make_array(make_array(3, 4),make_array(5, 6)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(NULL, 5.5, 6.6), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('i', 'p', NULL, 'u', 'm'), 'FixedSizeList(5, Utf8)')), - (arrow_cast(make_array(make_array(5, 6),make_array(7, 8)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(7.7, 8.8, 9.9), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('d', NULL, 'l', 'o', 'r'), 'FixedSizeList(5, Utf8)')), - (arrow_cast(make_array(make_array(7, NULL),make_array(9, 10)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(10.1, NULL, 12.2), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('s', 'i', 't', 'a', 'b'), 'FixedSizeList(5, Utf8)')), - (NULL, arrow_cast(make_array(13.3, 14.4, 15.5), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('a', 'm', 'e', 't', 'x'), 'FixedSizeList(5, Utf8)')), - (arrow_cast(make_array(make_array(11, 12),make_array(13, 14)), 'FixedSizeList(2, List(Int64))'), NULL, arrow_cast(make_array(',','a','b','c','d'), 'FixedSizeList(5, Utf8)')), - (arrow_cast(make_array(make_array(15, 16),make_array(NULL, 18)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(16.6, 17.7, 18.8), 'FixedSizeList(3, Float64)'), NULL) -; - -statement ok -CREATE TABLE slices -AS VALUES - (make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1), - (make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 2, -4), - (make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 0, 0), - (make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), -4, -7), - (NULL, 4, 5), - (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6), - (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 5, NULL) -; - -statement ok -CREATE TABLE fixed_slices -AS VALUES - (arrow_cast(make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 'FixedSizeList(10, Int64)'), 1, 1), - (arrow_cast(make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 'FixedSizeList(10, Int64)'), 2, -4), - (arrow_cast(make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 'FixedSizeList(10, Int64)'), 0, 0), - (arrow_cast(make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), 'FixedSizeList(10, Int64)'), -4, -7), - (arrow_cast(make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), 'FixedSizeList(10, Int64)'), NULL, 6), - (arrow_cast(make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60),'FixedSizeList(10, Int64)'), 5, NULL) -; - -statement ok -CREATE TABLE arrayspop -AS VALUES - (make_array(1, 2, NULL)), - (make_array(3, 4, 5, NULL)), - (make_array(6, 7, 8, NULL, 9)), - (make_array(NULL, NULL, 100)), - (NULL), - (make_array(NULL, 10, 11, 12)) -; - -statement ok -CREATE TABLE large_arrayspop -AS SELECT - arrow_cast(column1, 'LargeList(Int64)') AS column1 -FROM arrayspop -; - -statement ok -CREATE TABLE nested_arrays -AS VALUES - (make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), make_array(7, 8, 9), 2, make_array([[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]), make_array(11, 12, 13)), - (make_array(make_array(4, 5, 6), make_array(10, 11, 12), make_array(4, 9, 8), make_array(7, 8, 9), make_array(10, 11, 12), make_array(1, 8, 7)), make_array(10, 11, 12), 3, make_array([[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]]), make_array(121, 131, 141)) -; - -statement ok -CREATE TABLE large_nested_arrays -AS - SELECT - arrow_cast(column1, 'LargeList(LargeList(Int64))') AS column1, - arrow_cast(column2, 'LargeList(Int64)') AS column2, - column3, - arrow_cast(column4, 'LargeList(LargeList(List(Int64)))') AS column4, - arrow_cast(column5, 'LargeList(Int64)') AS column5 - FROM nested_arrays -; - -statement ok -CREATE TABLE fixed_size_nested_arrays -AS VALUES - (arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))'), arrow_cast(make_array(7, 8, 9), 'FixedSizeList(3, Int64)'), 2, arrow_cast(make_array([[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array(11, 12, 13), 'FixedSizeList(3, Int64)')), - (arrow_cast(make_array(make_array(4, 5, 6), make_array(10, 11, 12), make_array(4, 9, 8), make_array(7, 8, 9), make_array(10, 11, 12), make_array(1, 8, 7)), 'FixedSizeList(6, List(Int64))'), arrow_cast(make_array(10, 11, 12), 'FixedSizeList(3, Int64)'), 3, arrow_cast(make_array([[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array(121, 131, 141), 'FixedSizeList(3, Int64)')) -; - -statement ok -CREATE TABLE arrays_values -AS VALUES - (make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','), - (make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 12, 2, '.'), - (make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 23, 3, '-'), - (make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), 34, 4, 'ok'), - (NULL, 44, 5, '@'), - (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6, '$'), - (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 55, NULL, '^'), - (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 66, 7, NULL) -; - -statement ok -CREATE TABLE large_arrays_values -AS SELECT - arrow_cast(column1, 'LargeList(Int64)') AS column1, - column2, - column3, - column4 -FROM arrays_values -; - -statement ok -CREATE TABLE fixed_arrays_values -AS SELECT - arrow_cast(column1, 'FixedSizeList(10, Int64)') AS column1, - column2, - column3, - column4 -FROM arrays_values -; - -statement ok -CREATE TABLE arrays_values_v2 -AS VALUES - (make_array(NULL, 2, 3), make_array(4, 5, NULL), 12, make_array([30, 40, 50])), - (NULL, make_array(7, NULL, 8), 13, make_array(make_array(NULL,NULL,60))), - (make_array(9, NULL, 10), NULL, 14, make_array(make_array(70,NULL,NULL))), - (make_array(NULL, 1), make_array(NULL, 21), NULL, NULL), - (make_array(11, 12), NULL, NULL, NULL), - (NULL, NULL, NULL, NULL) -; - -statement ok -CREATE TABLE large_arrays_values_v2 -AS SELECT - arrow_cast(column1, 'LargeList(Int64)') AS column1, - arrow_cast(column2, 'LargeList(Int64)') AS column2, - column3, - arrow_cast(column4, 'LargeList(LargeList(Int64))') AS column4 -FROM arrays_values_v2 -; - -statement ok -CREATE TABLE flatten_table -AS VALUES - (make_array([1], [2], [3]), make_array([[1, 2, 3]], [[4, 5]], [[6]]), make_array([[[1]]], [[[2, 3]]]), make_array([1.0], [2.1, 2.2], [3.2, 3.3, 3.4])), - (make_array([1, 2], [3, 4], [5, 6]), make_array([[8]]), make_array([[[1,2]]], [[[3]]]), make_array([1.0, 2.0], [3.0, 4.0], [5.0, 6.0])) -; - -statement ok -CREATE TABLE large_flatten_table -AS - SELECT - arrow_cast(column1, 'LargeList(LargeList(Int64))') AS column1, - arrow_cast(column2, 'LargeList(LargeList(LargeList(Int64)))') AS column2, - arrow_cast(column3, 'LargeList(LargeList(LargeList(LargeList(Int64))))') AS column3, - arrow_cast(column4, 'LargeList(LargeList(Float64))') AS column4 - FROM flatten_table -; - -statement ok -CREATE TABLE fixed_size_flatten_table -AS VALUES - (arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3, List(Int64))'), - arrow_cast(make_array([[1, 2, 3]], [[4, 5]], [[6]]), 'FixedSizeList(3, List(List(Int64)))'), - arrow_cast(make_array([[[1]]], [[[2, 3]]]), 'FixedSizeList(2, List(List(List(Int64))))'), - arrow_cast(make_array([1.0], [2.1, 2.2], [3.2, 3.3, 3.4]), 'FixedSizeList(3, List(Float64))') - ), - ( - arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3, List(Int64))'), - arrow_cast(make_array([[8]], [[9, 10]], [[11, 12, 13]]), 'FixedSizeList(3, List(List(Int64)))'), - arrow_cast(make_array([[[1,2]]], [[[3]]]), 'FixedSizeList(2, List(List(List(Int64))))'), - arrow_cast(make_array([1.0, 2.0], [3.0, 4.0], [5.0, 6.0]), 'FixedSizeList(3, List(Float64))') - ) -; - -statement ok -CREATE TABLE array_has_table_1D -AS VALUES - (make_array(1, 2), 1, make_array(1,2,3), make_array(1,3), make_array(1,3,5), make_array(2,4,6,8,1,3,5)), - (make_array(3, 4, 5), 2, make_array(1,2,3,4), make_array(2,5), make_array(2,4,6), make_array(1,3,5)) -; - -statement ok -CREATE TABLE fixed_size_array_has_table_1D -AS VALUES - (arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 1, arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), arrow_cast(make_array(1,3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array(1,3,5), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(2, 4, 6, 8, 1, 3, 5), 'FixedSizeList(7, Int64)')), - (arrow_cast(make_array(3, 4, 5), 'FixedSizeList(3, Int64)'), 2, arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), arrow_cast(make_array(2,5), 'FixedSizeList(2, Int64)'), arrow_cast(make_array(2,4,6), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 3, 5, 7, 9, 11, 13), 'FixedSizeList(7, Int64)')) -; - -statement ok -CREATE TABLE array_has_table_1D_Float -AS VALUES - (make_array(1.0, 2.0), 1.0, make_array(1.0,2.0,3.0), make_array(1.0,3.0), make_array(1.11), make_array(2.22, 3.33)), - (make_array(3.0, 4.0, 5.0), 2.0, make_array(1.0,2.0,3.0,4.0), make_array(2.0,5.0), make_array(2.22, 1.11), make_array(1.11, 3.33)) -; - -statement ok -CREATE TABLE fixed_size_array_has_table_1D_Float -AS VALUES - (arrow_cast(make_array(1.0, 2.0, 3.0), 'FixedSizeList(3, Float64)'), 1.0, arrow_cast(make_array(1.0, 2.0, 3.0, 4.0), 'FixedSizeList(4, Float64)'), arrow_cast(make_array(1.0,3.0), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(1.11, 2.22), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(2.22, 3.33), 'FixedSizeList(2, Float64)')), - (arrow_cast(make_array(3.0, 4.0, 5.0), 'FixedSizeList(3, Float64)'), 2.0, arrow_cast(make_array(1.0, 2.0, 3.0, 4.0), 'FixedSizeList(4, Float64)'), arrow_cast(make_array(2.0,5.0), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(2.22, 1.11), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(1.11, 3.33), 'FixedSizeList(2, Float64)')) -; - -statement ok -CREATE TABLE array_has_table_1D_Boolean -AS VALUES - (make_array(true, true, true), false, make_array(true, true, false, true, false), make_array(true, false, true), make_array(false), make_array(true, false)), - (make_array(false, false, false), false, make_array(true, false, true), make_array(true, true), make_array(true, true), make_array(false,false,true)) -; - -statement ok -CREATE TABLE fixed_size_array_has_table_1D_Boolean -AS VALUES - (arrow_cast(make_array(true, true, true), 'FixedSizeList(3, Boolean)'), false, arrow_cast(make_array(true, true, false, true, false), 'FixedSizeList(5, Boolean)'), arrow_cast(make_array(true, false, true), 'FixedSizeList(3, Boolean)'), arrow_cast(make_array(false, true), 'FixedSizeList(2, Boolean)'), arrow_cast(make_array(true, false, true), 'FixedSizeList(3, Boolean)')), - (arrow_cast(make_array(false, false, false), 'FixedSizeList(3, Boolean)'), false, arrow_cast(make_array(true, false, true, true, false), 'FixedSizeList(5, Boolean)'), arrow_cast(make_array(true, true, false), 'FixedSizeList(3, Boolean)'), arrow_cast(make_array(true, true), 'FixedSizeList(2, Boolean)'), arrow_cast(make_array(false,false,true), 'FixedSizeList(3, Boolean)')) -; - -statement ok -CREATE TABLE array_has_table_1D_UTF8 -AS VALUES - (make_array('a', 'bc', 'def'), 'bc', make_array('datafusion', 'rust', 'arrow'), make_array('rust', 'arrow'), make_array('rust', 'arrow', 'python'), make_array('data')), - (make_array('a', 'bc', 'def'), 'defg', make_array('datafusion', 'rust', 'arrow'), make_array('datafusion', 'rust', 'arrow', 'python'), make_array('rust', 'arrow'), make_array('datafusion', 'rust', 'arrow')) -; - -statement ok -CREATE TABLE fixed_size_array_has_table_1D_UTF8 -AS VALUES - (arrow_cast(make_array('a', 'bc', 'def'), 'FixedSizeList(3, Utf8)'), 'bc', arrow_cast(make_array('datafusion', 'rust', 'arrow'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('rust', 'arrow', 'datafusion', 'rust'), 'FixedSizeList(4, Utf8)'), arrow_cast(make_array('rust', 'arrow', 'python'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('data', 'fusion', 'rust'), 'FixedSizeList(3, Utf8)')), - (arrow_cast(make_array('a', 'bc', 'def'), 'FixedSizeList(3, Utf8)'), 'defg', arrow_cast(make_array('datafusion', 'rust', 'arrow'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('datafusion', 'rust', 'arrow', 'python'), 'FixedSizeList(4, Utf8)'), arrow_cast(make_array('rust', 'arrow', 'python'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('datafusion', 'rust', 'arrow'), 'FixedSizeList(3, Utf8)')) -; - -statement ok -CREATE TABLE array_has_table_2D -AS VALUES - (make_array([1,2]), make_array(1,3), make_array([1,2,3], [4,5], [6,7]), make_array([4,5], [6,7])), - (make_array([3,4], [5]), make_array(5), make_array([1,2,3,4], [5,6,7], [8,9,10]), make_array([1,2,3], [5,6,7], [8,9,10])) -; - -statement ok -CREATE TABLE fixed_size_array_has_table_2D -AS VALUES - (arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(1,3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array([1,2,3], [4,5], [6,7]), 'FixedSizeList(3, List(Int64))'), arrow_cast(make_array([4,5], [6,7], [1,2,3]), 'FixedSizeList(3, List(Int64))')), - (arrow_cast(make_array([3,4], [5]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(5, 3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array([1,2,3,4], [5,6,7], [8,9,10]), 'FixedSizeList(3, List(Int64))'), arrow_cast(make_array([1,2,3], [5,6,7], [8,9,10]), 'FixedSizeList(3, List(Int64))')) -; - -statement ok -CREATE TABLE array_has_table_2D_float -AS VALUES - (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.1, 2.2], [3.3])), - (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.0], [1.1, 2.2], [3.3])) -; - -statement ok -CREATE TABLE fixed_size_array_has_table_2D_Float -AS VALUES - (arrow_cast(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), 'FixedSizeList(3, List(Float64))'), arrow_cast(make_array([1.1, 2.2], [3.3], [4.4]), 'FixedSizeList(3, List(Float64))')), - (arrow_cast(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), 'FixedSizeList(3, List(Float64))'), arrow_cast(make_array([1.0], [1.1, 2.2], [3.3]), 'FixedSizeList(3, List(Float64))')) -; - -statement ok -CREATE TABLE array_has_table_3D -AS VALUES - (make_array([[1,2]]), make_array([1])), - (make_array([[1,2]]), make_array([1,2])), - (make_array([[1,2]]), make_array([1,2,3])), - (make_array([[1], [2]]), make_array([2])), - (make_array([[1], [2]]), make_array([1], [2])), - (make_array([[1], [2]], [[2], [3]]), make_array([1], [2], [3])), - (make_array([[1], [2]], [[2], [3]]), make_array([1], [2])) -; - -statement ok -CREATE TABLE fixed_size_array_has_table_3D -AS VALUES - (arrow_cast(make_array([[1,2]], [[3, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')), - (arrow_cast(make_array([[1,2]], [[4, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1,2], [3, 4]), 'FixedSizeList(2, List(Int64))')), - (arrow_cast(make_array([[1,2]], [[4, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1,2,3], [1]), 'FixedSizeList(2, List(Int64))')), - (arrow_cast(make_array([[1], [2]], [[]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([2], [3]), 'FixedSizeList(2, List(Int64))')), - (arrow_cast(make_array([[1], [2]], [[]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')), - (arrow_cast(make_array([[1], [2]], [[2], [3]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')), - (arrow_cast(make_array([[1], [2]], [[2], [3]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')) -; - -statement ok -CREATE TABLE array_has_table_null -AS VALUES - (make_array(1, 2), 1), - (make_array(1, NULL), 1), - (make_array(3, 4, 5), 2), - (make_array(3, NULL, 5), 2), - (make_array(NULL, NULL, NULL), 2) -; - -statement ok -CREATE TABLE array_has_table_empty -AS VALUES - (make_array(1, 3, 5), 1), - (make_array(), 1), - (NULL, 1) -; - -statement ok -CREATE TABLE array_distinct_table_1D -AS VALUES - (make_array(1, 1, 2, 2, 3)), - (make_array(1, 2, 3, 4, 5)), - (make_array(3, 5, 3, 3, 3)) -; - -statement ok -CREATE TABLE array_distinct_table_1D_UTF8 -AS VALUES - (make_array('a', 'a', 'bc', 'bc', 'def')), - (make_array('a', 'bc', 'def', 'defg', 'defg')), - (make_array('defg', 'defg', 'defg', 'defg', 'defg')) -; - -statement ok -CREATE TABLE array_distinct_table_2D -AS VALUES - (make_array([1,2], [1,2], [3,4], [3,4], [5,6])), - (make_array([1,2], [3,4], [5,6], [7,8], [9,10])), - (make_array([5,6], [5,6], NULL)) -; - -statement ok -CREATE TABLE array_distinct_table_1D_large -AS SELECT - arrow_cast(column1, 'LargeList(Int64)') AS column1 -FROM array_distinct_table_1D -; - -statement ok -CREATE TABLE array_distinct_table_1D_fixed -AS SELECT - arrow_cast(column1, 'FixedSizeList(5, Int64)') AS column1 -FROM array_distinct_table_1D -; - -statement ok -CREATE TABLE array_distinct_table_1D_UTF8_fixed -AS SELECT - arrow_cast(column1, 'FixedSizeList(5, Utf8)') AS column1 -FROM array_distinct_table_1D_UTF8 -; - -statement ok -CREATE TABLE array_distinct_table_2D_fixed -AS VALUES - (arrow_cast(make_array([1,2], [1,2], [3,4], [3,4], [5,6]), 'FixedSizeList(5, List(Int64))')), - (arrow_cast(make_array([1,2], [3,4], [5,6], [7,8], [9,10]), 'FixedSizeList(5, List(Int64))')), - (arrow_cast(make_array([5,6], [5,6], NULL, NULL, NULL), 'FixedSizeList(5, List(Int64))')) -; - -statement ok -CREATE TABLE array_intersect_table_1D -AS VALUES - (make_array(1, 2), make_array(1), make_array(1,2,3), make_array(1,3), make_array(1,3,5), make_array(2,4,6,8,1,3)), - (make_array(11, 22), make_array(11), make_array(11,22,33), make_array(11,33), make_array(11,33,55), make_array(22,44,66,88,11,33)) -; - -statement ok -CREATE TABLE large_array_intersect_table_1D -AS - SELECT - arrow_cast(column1, 'LargeList(Int64)') as column1, - arrow_cast(column2, 'LargeList(Int64)') as column2, - arrow_cast(column3, 'LargeList(Int64)') as column3, - arrow_cast(column4, 'LargeList(Int64)') as column4, - arrow_cast(column5, 'LargeList(Int64)') as column5, - arrow_cast(column6, 'LargeList(Int64)') as column6 -FROM array_intersect_table_1D -; - -statement ok -CREATE TABLE array_intersect_table_1D_Float -AS VALUES - (make_array(1.0, 2.0), make_array(1.0), make_array(1.0,2.0,3.0), make_array(1.0,3.0), make_array(1.11), make_array(2.22, 3.33)), - (make_array(3.0, 4.0, 5.0), make_array(2.0), make_array(1.0,2.0,3.0,4.0), make_array(2.0,5.0), make_array(2.22, 1.11), make_array(1.11, 3.33)) -; - -statement ok -CREATE TABLE large_array_intersect_table_1D_Float -AS - SELECT - arrow_cast(column1, 'LargeList(Float64)') as column1, - arrow_cast(column2, 'LargeList(Float64)') as column2, - arrow_cast(column3, 'LargeList(Float64)') as column3, - arrow_cast(column4, 'LargeList(Float64)') as column4, - arrow_cast(column5, 'LargeList(Float64)') as column5, - arrow_cast(column6, 'LargeList(Float64)') as column6 -FROM array_intersect_table_1D_Float -; - -statement ok -CREATE TABLE array_intersect_table_1D_Boolean -AS VALUES - (make_array(true, true, true), make_array(false), make_array(true, true, false, true, false), make_array(true, false, true), make_array(false), make_array(true, false)), - (make_array(false, false, false), make_array(false), make_array(true, false, true), make_array(true, true), make_array(true, true), make_array(false,false,true)) -; - -statement ok -CREATE TABLE large_array_intersect_table_1D_Boolean -AS - SELECT - arrow_cast(column1, 'LargeList(Boolean)') as column1, - arrow_cast(column2, 'LargeList(Boolean)') as column2, - arrow_cast(column3, 'LargeList(Boolean)') as column3, - arrow_cast(column4, 'LargeList(Boolean)') as column4, - arrow_cast(column5, 'LargeList(Boolean)') as column5, - arrow_cast(column6, 'LargeList(Boolean)') as column6 -FROM array_intersect_table_1D_Boolean -; - -statement ok -CREATE TABLE array_intersect_table_1D_UTF8 -AS VALUES - (make_array('a', 'bc', 'def'), make_array('bc'), make_array('datafusion', 'rust', 'arrow'), make_array('rust', 'arrow'), make_array('rust', 'arrow', 'python'), make_array('data')), - (make_array('a', 'bc', 'def'), make_array('defg'), make_array('datafusion', 'rust', 'arrow'), make_array('datafusion', 'rust', 'arrow', 'python'), make_array('rust', 'arrow'), make_array('datafusion', 'rust', 'arrow')) -; - -statement ok -CREATE TABLE large_array_intersect_table_1D_UTF8 -AS - SELECT - arrow_cast(column1, 'LargeList(Utf8)') as column1, - arrow_cast(column2, 'LargeList(Utf8)') as column2, - arrow_cast(column3, 'LargeList(Utf8)') as column3, - arrow_cast(column4, 'LargeList(Utf8)') as column4, - arrow_cast(column5, 'LargeList(Utf8)') as column5, - arrow_cast(column6, 'LargeList(Utf8)') as column6 -FROM array_intersect_table_1D_UTF8 -; - -statement ok -CREATE TABLE array_intersect_table_1D_NULL -AS VALUES - ([1, 2, 2, 3], [2, 3, 4]), - ([2, 3, 3], [3]), - ([3], [3, 3, 4]), - (null, [3, 4]), - ([1, 2], null), - (null, null) -; - -statement ok -CREATE TABLE array_intersect_table_2D -AS VALUES - (make_array([1,2]), make_array([1,3]), make_array([1,2,3], [4,5], [6,7]), make_array([4,5], [6,7])), - (make_array([3,4], [5]), make_array([3,4]), make_array([1,2,3,4], [5,6,7], [8,9,10]), make_array([1,2,3], [5,6,7], [8,9,10])) -; - -statement ok -CREATE TABLE large_array_intersect_table_2D -AS - SELECT - arrow_cast(column1, 'LargeList(List(Int64))') as column1, - arrow_cast(column2, 'LargeList(List(Int64))') as column2, - arrow_cast(column3, 'LargeList(List(Int64))') as column3, - arrow_cast(column4, 'LargeList(List(Int64))') as column4 -FROM array_intersect_table_2D -; - -statement ok -CREATE TABLE array_intersect_table_2D_float -AS VALUES - (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.1, 2.2], [3.3])), - (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.0], [1.1, 2.2], [3.3])) -; - -statement ok -CREATE TABLE large_array_intersect_table_2D_Float -AS - SELECT - arrow_cast(column1, 'LargeList(List(Float64))') as column1, - arrow_cast(column2, 'LargeList(List(Float64))') as column2 -FROM array_intersect_table_2D_Float -; - -statement ok -CREATE TABLE array_intersect_table_3D -AS VALUES - (make_array([[1,2]]), make_array([[1]])), - (make_array([[1,2]]), make_array([[1,2]])) -; - -statement ok -CREATE TABLE large_array_intersect_table_3D -AS - SELECT - arrow_cast(column1, 'LargeList(List(List(Int64)))') as column1, - arrow_cast(column2, 'LargeList(List(List(Int64)))') as column2 -FROM array_intersect_table_3D -; - -statement ok -CREATE TABLE arrays_values_without_nulls -AS VALUES - (make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ',', [2,3]), - (make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 12, 2, '.', [4,5]), - (make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 23, 3, '-', [6,7]), - (make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok', [8,9]) -; - -statement ok -CREATE TABLE large_arrays_values_without_nulls -AS SELECT - arrow_cast(column1, 'LargeList(Int64)') AS column1, - column2, - column3, - column4, - arrow_cast(column5, 'LargeList(Int64)') AS column5 -FROM arrays_values_without_nulls -; - -statement ok -CREATE TABLE fixed_size_arrays_values_without_nulls -AS VALUES - (arrow_cast(make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 'FixedSizeList(10, Int64)'), 1, 1, ',', [2,3]), - (arrow_cast(make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 'FixedSizeList(10, Int64)'), 12, 2, '.', [4,5]), - (arrow_cast(make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 'FixedSizeList(10, Int64)'), 23, 3, '-', [6,7]), - (arrow_cast(make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 'FixedSizeList(10, Int64)'), 34, 4, 'ok', [8,9]) -; - -statement ok -CREATE TABLE arrays_range -AS VALUES - (3, 10, 2), - (4, 13, 3) -; - -statement ok -CREATE TABLE arrays_with_repeating_elements -AS VALUES - (make_array(1, 2, 1, 3, 2, 2, 1, 3, 2, 3), 2, 4, 3), - (make_array(4, 4, 5, 5, 6, 5, 5, 5, 4, 4), 4, 7, 2), - (make_array(7, 7, 7, 8, 7, 9, 7, 8, 7, 7), 7, 10, 5), - (make_array(10, 11, 12, 10, 11, 12, 10, 11, 12, 10), 10, 13, 10) -; - -statement ok -CREATE TABLE large_arrays_with_repeating_elements -AS - SELECT - arrow_cast(column1, 'LargeList(Int64)') AS column1, - column2, - column3, - column4 - FROM arrays_with_repeating_elements -; - -statement ok -CREATE TABLE fixed_arrays_with_repeating_elements -AS VALUES - (arrow_cast(make_array(1, 2, 1, 3, 2, 2, 1, 3, 2, 3), 'FixedSizeList(10, Int64)'), 2, 4, 3), - (arrow_cast(make_array(4, 4, 5, 5, 6, 5, 5, 5, 4, 4), 'FixedSizeList(10, Int64)'), 4, 7, 2), - (arrow_cast(make_array(7, 7, 7, 8, 7, 9, 7, 8, 7, 7), 'FixedSizeList(10, Int64)'), 7, 10, 5), - (arrow_cast(make_array(10, 11, 12, 10, 11, 12, 10, 11, 12, 10), 'FixedSizeList(10, Int64)'), 10, 13, 10) -; - -statement ok -CREATE TABLE nested_arrays_with_repeating_elements -AS VALUES - (make_array([1, 2, 3], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]), [4, 5, 6], [10, 11, 12], 3), - (make_array([10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]), [10, 11, 12], [19, 20, 21], 2), - (make_array([19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]), [19, 20, 21], [28, 29, 30], 5), - (make_array([28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]), [28, 29, 30], [37, 38, 39], 10) -; - -statement ok -CREATE TABLE large_nested_arrays_with_repeating_elements -AS - SELECT - arrow_cast(column1, 'LargeList(List(Int64))') AS column1, - column2, - column3, - column4 - FROM nested_arrays_with_repeating_elements -; - -statement ok -CREATE TABLE fixed_size_nested_arrays_with_repeating_elements -AS VALUES - (arrow_cast(make_array([1, 2, 3], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(10, List(Int64))'), [4, 5, 6], [10, 11, 12], 3), - (arrow_cast(make_array([10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]), 'FixedSizeList(10, List(Int64))'), [10, 11, 12], [19, 20, 21], 2), - (arrow_cast(make_array([19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]), 'FixedSizeList(10, List(Int64))'), [19, 20, 21], [28, 29, 30], 5), - (arrow_cast(make_array([28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]), 'FixedSizeList(10, List(Int64))'), [28, 29, 30], [28, 29, 30], 10) -; - -statement ok -CREATE TABLE arrays_distance_table -AS VALUES - (make_array(1, 2, 3), make_array(1, 2, 3), make_array(1.1, 2.2, 3.3) , make_array(1.1, NULL, 3.3)), - (make_array(1, 2, 3), make_array(4, 5, 6), make_array(4.4, 5.5, 6.6), make_array(4.4, NULL, 6.6)), - (make_array(1, 2, 3), make_array(7, 8, 9), make_array(7.7, 8.8, 9.9), make_array(7.7, NULL, 9.9)), - (make_array(1, 2, 3), make_array(10, 11, 12), make_array(10.1, 11.2, 12.3), make_array(10.1, NULL, 12.3)) -; - -statement ok -CREATE TABLE large_arrays_distance_table -AS - SELECT - arrow_cast(column1, 'LargeList(Int64)') AS column1, - arrow_cast(column2, 'LargeList(Int64)') AS column2, - arrow_cast(column3, 'LargeList(Float64)') AS column3, - arrow_cast(column4, 'LargeList(Float64)') AS column4 -FROM arrays_distance_table -; - -statement ok -CREATE TABLE fixed_size_arrays_distance_table -AS - SELECT - arrow_cast(column1, 'FixedSizeList(3, Int64)') AS column1, - arrow_cast(column2, 'FixedSizeList(3, Int64)') AS column2, - arrow_cast(column3, 'FixedSizeList(3, Float64)') AS column3, - arrow_cast(column4, 'FixedSizeList(3, Float64)') AS column4 -FROM arrays_distance_table -; - - -# Array literal - -## boolean coercion is not supported -query error -select [1, true, null] - -## wrapped in array_length to get deterministic results -query I -SELECT array_length([now()]) ----- -1 - -## array literal with functions -query ? -select [abs(-1.2), sin(-1), log(2), ceil(3.141)] ----- -[1.2, -0.8414709848078965, 0.30102999566398114, 4.0] - -## array literal with nested types -query ??? -select - [struct('foo', 1)], - [struct('foo', [1,2,3])], - [struct('foo', [struct(3, 'x')])] -; ----- -[{c0: foo, c1: 1}] [{c0: foo, c1: [1, 2, 3]}] [{c0: foo, c1: [{c0: 3, c1: x}]}] - -query TTT -select arrow_typeof(column1), arrow_typeof(column2), arrow_typeof(column3) from arrays; ----- -List(List(Int64)) List(Float64) List(Utf8) -List(List(Int64)) List(Float64) List(Utf8) -List(List(Int64)) List(Float64) List(Utf8) -List(List(Int64)) List(Float64) List(Utf8) -List(List(Int64)) List(Float64) List(Utf8) -List(List(Int64)) List(Float64) List(Utf8) -List(List(Int64)) List(Float64) List(Utf8) - -# arrays table -query ??? -select column1, column2, column3 from arrays; ----- -[[NULL, 2], [3, NULL]] [1.1, 2.2, 3.3] [L, o, r, e, m] -[[3, 4], [5, 6]] [NULL, 5.5, 6.6] [i, p, NULL, u, m] -[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, NULL, l, o, r] -[[7, NULL], [9, 10]] [10.1, NULL, 12.2] [s, i, t] -NULL [13.3, 14.4, 15.5] [a, m, e, t] -[[11, 12], [13, 14]] NULL [,] -[[15, 16], [NULL, 18]] [16.6, 17.7, 18.8] NULL - -# nested_arrays table -query ??I?? -select column1, column2, column3, column4, column5 from nested_arrays; ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [7, 8, 9] 2 [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]] [11, 12, 13] -[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [10, 11, 12] 3 [[[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]]] [121, 131, 141] - -# values table -query IIIRT -select a, b, c, d, e from values; ----- -1 1 2 1.1 Lorem -2 3 4 2.2 ipsum -3 5 6 3.3 dolor -4 7 8 4.4 sit -NULL 9 10 5.5 amet -5 NULL 12 6.6 , -6 11 NULL 7.7 consectetur -7 13 14 NULL adipiscing -8 15 16 8.8 NULL - -# arrays_values table -query ?IIT -select column1, column2, column3, column4 from arrays_values; ----- -[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 , -[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] 12 2 . -[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] 23 3 - -[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] 34 4 ok -NULL 44 5 @ -[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 $ -[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] 55 NULL ^ -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70] 66 7 NULL - -# slices table -query ?II -select column1, column2, column3 from slices; ----- -[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 -[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] 2 -4 -[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] 0 0 -[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] -4 -7 -NULL 4 5 -[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 -[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] 5 NULL - -query ??I? -select column1, column2, column3, column4 from arrays_values_v2; ----- -[NULL, 2, 3] [4, 5, NULL] 12 [[30, 40, 50]] -NULL [7, NULL, 8] 13 [[NULL, NULL, 60]] -[9, NULL, 10] NULL 14 [[70, NULL, NULL]] -[NULL, 1] [NULL, 21] NULL NULL -[11, 12] NULL NULL NULL -NULL NULL NULL NULL - -# arrays_values_without_nulls table -query ?IIT -select column1, column2, column3, column4 from arrays_values_without_nulls; ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 , -[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 12 2 . -[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 23 3 - -[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] 34 4 ok - -# arrays_with_repeating_elements table -query ?III -select column1, column2, column3, column4 from arrays_with_repeating_elements; ----- -[1, 2, 1, 3, 2, 2, 1, 3, 2, 3] 2 4 3 -[4, 4, 5, 5, 6, 5, 5, 5, 4, 4] 4 7 2 -[7, 7, 7, 8, 7, 9, 7, 8, 7, 7] 7 10 5 -[10, 11, 12, 10, 11, 12, 10, 11, 12, 10] 10 13 10 - -# nested_arrays_with_repeating_elements table -query ???I -select column1, column2, column3, column4 from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [4, 5, 6] [10, 11, 12] 3 -[[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [10, 11, 12] [19, 20, 21] 2 -[[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [19, 20, 21] [28, 29, 30] 5 -[[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [28, 29, 30] [37, 38, 39] 10 - - -### Array index - - -## array[i] - -# single index with scalars #1 (positive index) -query IRT -select make_array(1, 2, 3)[1], make_array(1.0, 2.0, 3.0)[2], make_array('h', 'e', 'l', 'l', 'o')[3]; ----- -1 2 l - -# single index with scalars #2 (zero index) -query I -select make_array(1, 2, 3)[0]; ----- -NULL - -# single index with scalars #3 (negative index) -query IRT -select make_array(1, 2, 3)[-1], make_array(1.0, 2.0, 3.0)[-2], make_array('h', 'e', 'l', 'l', 'o')[-3]; ----- -3 2 l - -# single index with scalars #4 (complex index) -query IRT -select make_array(1, 2, 3)[1 + 2 - 1], make_array(1.0, 2.0, 3.0)[2 * 1 * 0 - 2], make_array('h', 'e', 'l', 'l', 'o')[2 - 3]; ----- -2 2 o - -# single index with columns #1 (positive index) -query ?RT -select column1[2], column2[3], column3[1] from arrays; ----- -[3, NULL] 3.3 L -[5, 6] 6.6 i -[7, 8] 9.9 d -[9, 10] 12.2 s -NULL 15.5 a -[13, 14] NULL , -[NULL, 18] 18.8 NULL - -# single index with columns #2 (zero index) -query ?RT -select column1[0], column2[0], column3[0] from arrays; ----- -NULL NULL NULL -NULL NULL NULL -NULL NULL NULL -NULL NULL NULL -NULL NULL NULL -NULL NULL NULL -NULL NULL NULL - -# single index with columns #3 (negative index) -query ?RT -select column1[-2], column2[-3], column3[-1] from arrays; ----- -[NULL, 2] 1.1 m -[3, 4] NULL m -[5, 6] 7.7 r -[7, NULL] 10.1 t -NULL 13.3 t -[11, 12] NULL , -[15, 16] 16.6 NULL - -# single index with columns #4 (complex index) -query ?RT -select column1[9 - 7], column2[2 * 0], column3[1 - 3] from arrays; ----- -[3, NULL] NULL e -[5, 6] NULL u -[7, 8] NULL o -[9, 10] NULL i -NULL NULL e -[13, 14] NULL NULL -[NULL, 18] NULL NULL - -# TODO: support index as column -# single index with columns #5 (index as column) -# query ? -# select make_array(1, 2, 3, 4, 5)[column2] from arrays_with_repeating_elements; -# ---- - -# TODO: support argument and index as columns -# single index with columns #6 (argument and index as columns) -# query I -# select column1[column2] from arrays_with_repeating_elements; -# ---- - -## array[i:j] - -# multiple index with columns #1 (positive index) -query ??? -select make_array(1, 2, 3)[1:2], make_array(1.0, 2.0, 3.0)[2:3], make_array('h', 'e', 'l', 'l', 'o')[2:4]; ----- -[1, 2] [2.0, 3.0] [e, l, l] - -query ??? -select arrow_cast([1, 2, 3], 'LargeList(Int64)')[1:2], - arrow_cast([1.0, 2.0, 3.0], 'LargeList(Int64)')[2:3], - arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)')[2:4] -; ----- -[1, 2] [2, 3] [e, l, l] - -# multiple index with columns #2 (zero index) -query ??? -select make_array(1, 2, 3)[0:0], make_array(1.0, 2.0, 3.0)[0:2], make_array('h', 'e', 'l', 'l', 'o')[0:6]; ----- -[] [1.0, 2.0] [h, e, l, l, o] - -query ??? -select arrow_cast([1, 2, 3], 'LargeList(Int64)')[0:0], - arrow_cast([1.0, 2.0, 3.0], 'LargeList(Int64)')[0:2], - arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)')[0:6] -; ----- -[] [1, 2] [h, e, l, l, o] - -query I -select arrow_cast([1, 2, 3], 'LargeList(Int64)')[1]; ----- -1 - -# TODO: support multiple negative index -# multiple index with columns #3 (negative index) -# query II -# select make_array(1, 2, 3)[-3:-1], make_array(1.0, 2.0, 3.0)[-3:-1], make_array('h', 'e', 'l', 'l', 'o')[-2:0]; -# ---- - -# TODO: support complex index -# multiple index with columns #4 (complex index) -# query III -# select make_array(1, 2, 3)[2 + 1 - 1:10], make_array(1.0, 2.0, 3.0)[2 | 2:10], make_array('h', 'e', 'l', 'l', 'o')[6 ^ 6:10]; -# ---- - -# multiple index with columns #1 (positive index) -query ??? -select column1[2:4], column2[1:4], column3[3:4] from arrays; ----- -[[3, NULL]] [1.1, 2.2, 3.3] [r, e] -[[5, 6]] [NULL, 5.5, 6.6] [NULL, u] -[[7, 8]] [7.7, 8.8, 9.9] [l, o] -[[9, 10]] [10.1, NULL, 12.2] [t] -NULL [13.3, 14.4, 15.5] [e, t] -[[13, 14]] NULL [] -[[NULL, 18]] [16.6, 17.7, 18.8] NULL - -# multiple index with columns #2 (zero index) -query ??? -select column1[0:5], column2[0:3], column3[0:9] from arrays; ----- -[[NULL, 2], [3, NULL]] [1.1, 2.2, 3.3] [L, o, r, e, m] -[[3, 4], [5, 6]] [NULL, 5.5, 6.6] [i, p, NULL, u, m] -[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, NULL, l, o, r] -[[7, NULL], [9, 10]] [10.1, NULL, 12.2] [s, i, t] -NULL [13.3, 14.4, 15.5] [a, m, e, t] -[[11, 12], [13, 14]] NULL [,] -[[15, 16], [NULL, 18]] [16.6, 17.7, 18.8] NULL - -# TODO: support negative index -# multiple index with columns #3 (negative index) -# query ?RT -# select column1[-2:-4], column2[-3:-5], column3[-1:-4] from arrays; -# ---- -# [NULL, 2] 1.1 m - -# TODO: support complex index -# multiple index with columns #4 (complex index) -# query ?RT -# select column1[9 - 7:2 + 2], column2[1 * 0:2 * 3], column3[1 + 1 - 0:5 % 3] from arrays; -# ---- - -# TODO: support first index as column -# multiple index with columns #5 (first index as column) -# query ? -# select make_array(1, 2, 3, 4, 5)[column2:4] from arrays_with_repeating_elements -# ---- - -# TODO: support last index as column -# multiple index with columns #6 (last index as column) -# query ?RT -# select make_array(1, 2, 3, 4, 5)[2:column3] from arrays_with_repeating_elements; -# ---- - -# TODO: support argument and indices as column -# multiple index with columns #7 (argument and indices as column) -# query ?RT -# select column1[column2:column3] from arrays_with_repeating_elements; -# ---- - -# array[i:j:k] - -# multiple index with columns #1 (positive index) -query ??? -select make_array(1, 2, 3)[1:2:2], make_array(1.0, 2.0, 3.0)[2:3:2], make_array('h', 'e', 'l', 'l', 'o')[2:4:2]; ----- -[1] [2.0] [e, l] - -# multiple index with columns #2 (zero index) -query ??? -select make_array(1, 2, 3)[0:0:2], make_array(1.0, 2.0, 3.0)[0:2:2], make_array('h', 'e', 'l', 'l', 'o')[0:6:2]; ----- -[] [1.0] [h, l, o] - -#TODO: sqlparser does not support negative index -## multiple index with columns #3 (negative index) -#query ??? -#select make_array(1, 2, 3)[-1:-2:-2], make_array(1.0, 2.0, 3.0)[-2:-3:-2], make_array('h', 'e', 'l', 'l', 'o')[-2:-4:-2]; -#---- -#[1] [2.0] [e, l] - -# multiple index with columns #1 (positive index) -query ??? -select column1[2:4:2], column2[1:4:2], column3[3:4:2] from arrays; ----- -[[3, NULL]] [1.1, 3.3] [r] -[[5, 6]] [NULL, 6.6] [NULL] -[[7, 8]] [7.7, 9.9] [l] -[[9, 10]] [10.1, 12.2] [t] -NULL [13.3, 15.5] [e] -[[13, 14]] NULL [] -[[NULL, 18]] [16.6, 18.8] NULL - -# multiple index with columns #2 (zero index) -query ??? -select column1[0:5:2], column2[0:3:2], column3[0:9:2] from arrays; ----- -[[NULL, 2]] [1.1, 3.3] [L, r, m] -[[3, 4]] [NULL, 6.6] [i, NULL, m] -[[5, 6]] [7.7, 9.9] [d, l, r] -[[7, NULL]] [10.1, 12.2] [s, t] -NULL [13.3, 15.5] [a, e] -[[11, 12]] NULL [,] -[[15, 16]] [16.6, 18.8] NULL - - -### Array function tests - - -## make_array (aliases: `make_list`) - -# make_array scalar function #1 -query ??? -select make_array(1, 2, 3), make_array(1.0, 2.0, 3.0), make_array('h', 'e', 'l', 'l', 'o'); ----- -[1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o] - -# make_array scalar function #2 -query ??? -select make_array(1, 2, 3), make_array(make_array(1, 2), make_array(3, 4)), make_array([[[[1], [2]]]]); ----- -[1, 2, 3] [[1, 2], [3, 4]] [[[[[1], [2]]]]] - -# make_array scalar function #3 -query ?? -select make_array([1, 2, 3], [4, 5, 6], [7, 8, 9]), make_array([[1, 2], [3, 4]], [[5, 6], [7, 8]]); ----- -[[1, 2, 3], [4, 5, 6], [7, 8, 9]] [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] - -# make_array scalar function #4 -query ?? -select make_array([1.0, 2.0], [3.0, 4.0]), make_array('h', 'e', 'l', 'l', 'o'); ----- -[[1.0, 2.0], [3.0, 4.0]] [h, e, l, l, o] - -# make_array scalar function #5 -query ? -select make_array(make_array(make_array(make_array(1, 2, 3), make_array(4, 5, 6)), make_array(make_array(7, 8, 9), make_array(10, 11, 12)))) ----- -[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]] - -# make_array scalar function #6 -query ? -select make_array() ----- -[] - -# make_array scalar function #7 -query ?? -select make_array(make_array()), make_array(make_array(make_array())) ----- -[[]] [[[]]] - -# make_list scalar function #8 (function alias: `make_array`) -query ??? -select make_list(1, 2, 3), make_list(1.0, 2.0, 3.0), make_list('h', 'e', 'l', 'l', 'o'); ----- -[1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o] - -# make_array scalar function with nulls -query ??? -select make_array(1, NULL, 3), make_array(NULL, 2.0, NULL), make_array('h', NULL, 'l', NULL, 'o'); ----- -[1, NULL, 3] [NULL, 2.0, NULL] [h, NULL, l, NULL, o] - -# make_array scalar function with nulls #2 -query ?? -select make_array(1, 2, NULL), make_array(make_array(NULL, 2), make_array(NULL, 3)); ----- -[1, 2, NULL] [[NULL, 2], [NULL, 3]] - -# make_array scalar function with nulls #3 -query ??? -select make_array(NULL), make_array(NULL, NULL, NULL), make_array(make_array(NULL, NULL), make_array(NULL, NULL)); ----- -[NULL] [NULL, NULL, NULL] [[NULL, NULL], [NULL, NULL]] - -# make_array with 1 columns -query ??? -select make_array(a), make_array(d), make_array(e) from values; ----- -[1] [1.1] [Lorem] -[2] [2.2] [ipsum] -[3] [3.3] [dolor] -[4] [4.4] [sit] -[NULL] [5.5] [amet] -[5] [6.6] [,] -[6] [7.7] [consectetur] -[7] [NULL] [adipiscing] -[8] [8.8] [NULL] - -# make_array with 2 columns #1 -query ?? -select make_array(b, c), make_array(e, f) from values; ----- -[1, 2] [Lorem, A] -[3, 4] [ipsum, ] -[5, 6] [dolor, BB] -[7, 8] [sit, NULL] -[9, 10] [amet, CCC] -[NULL, 12] [,, DD] -[11, NULL] [consectetur, E] -[13, 14] [adipiscing, F] -[15, 16] [NULL, ] - -# make_array with 4 columns -query ? -select make_array(a, b, c, d) from values; ----- -[1.0, 1.0, 2.0, 1.1] -[2.0, 3.0, 4.0, 2.2] -[3.0, 5.0, 6.0, 3.3] -[4.0, 7.0, 8.0, 4.4] -[NULL, 9.0, 10.0, 5.5] -[5.0, NULL, 12.0, 6.6] -[6.0, 11.0, NULL, 7.7] -[7.0, 13.0, 14.0, NULL] -[8.0, 15.0, 16.0, 8.8] - -# make_array with column of list -query ?? -select column1, column5 from arrays_values_without_nulls; ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [2, 3] -[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] [4, 5] -[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] [6, 7] -[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] [8, 9] - -# make array with arrays of different types -query ? -select make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)')) ----- -[[1], [-1]] - -query T -select arrow_typeof(make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)'))); ----- -List(LargeList(Int64)) - - -query ??? -select make_array(column1), - make_array(column1, column5), - make_array(column1, make_array(50,51,52)) -from arrays_values_without_nulls; ----- -[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]] [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [2, 3]] [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [50, 51, 52]] -[[11, 12, 13, 14, 15, 16, 17, 18, 19, 20]] [[11, 12, 13, 14, 15, 16, 17, 18, 19, 20], [4, 5]] [[11, 12, 13, 14, 15, 16, 17, 18, 19, 20], [50, 51, 52]] -[[21, 22, 23, 24, 25, 26, 27, 28, 29, 30]] [[21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [6, 7]] [[21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [50, 51, 52]] -[[31, 32, 33, 34, 35, 26, 37, 38, 39, 40]] [[31, 32, 33, 34, 35, 26, 37, 38, 39, 40], [8, 9]] [[31, 32, 33, 34, 35, 26, 37, 38, 39, 40], [50, 51, 52]] - -## array_element (aliases: array_extract, list_extract, list_element) - -# Testing with empty arguments should result in an error -query error DataFusion error: Error during planning: 'array_element' does not support zero arguments -select array_element(); - -# array_element error -query error -select array_element(1, 2); - -# array_element with null -query I -select array_element([1, 2], NULL); ----- -NULL - -query ? -select array_element(NULL, 2); ----- -NULL - -# array_element scalar function #1 (with positive index) -query IT -select array_element(make_array(1, 2, 3, 4, 5), 2), array_element(make_array('h', 'e', 'l', 'l', 'o'), 3); ----- -2 l - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); ----- -2 l - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); ----- -2 l - -# array_element scalar function #2 (with positive index; out of bounds) -query IT -select array_element(make_array(1, 2, 3, 4, 5), 7), array_element(make_array('h', 'e', 'l', 'l', 'o'), 11); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 11); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 11); ----- -NULL NULL - -# array_element scalar function #3 (with zero) -query IT -select array_element(make_array(1, 2, 3, 4, 5), 0), array_element(make_array('h', 'e', 'l', 'l', 'o'), 0); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 0), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 0); ----- -NULL NULL - -# array_element scalar function #4 (with NULL) -query IT -select array_element(make_array(1, 2, 3, 4, 5), NULL), array_element(make_array('h', 'e', 'l', 'l', 'o'), NULL); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), NULL), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), NULL); ----- -NULL NULL - -# array_element scalar function #5 (with negative index) -query IT -select array_element(make_array(1, 2, 3, 4, 5), -2), array_element(make_array('h', 'e', 'l', 'l', 'o'), -3); ----- -4 l - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3); ----- -4 l - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), -2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), -3); ----- -4 l - -# array_element scalar function #6 (with negative index; out of bounds) -query IT -select array_element(make_array(1, 2, 3, 4, 5), -11), array_element(make_array('h', 'e', 'l', 'l', 'o'), -7); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -11), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -7); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), -11), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), -7); ----- -NULL NULL - -# array_element scalar function #7 (nested array) -query ? -select array_element(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 1); ----- -[1, 2, 3, 4, 5] - -query ? -select array_element(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'LargeList(List(Int64))'), 1); ----- -[1, 2, 3, 4, 5] - -query ? -select array_element(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'FixedSizeList(2, List(Int64))'), 1); ----- -[1, 2, 3, 4, 5] - -# array_extract scalar function #8 (function alias `array_element`) -query IT -select array_extract(make_array(1, 2, 3, 4, 5), 2), array_extract(make_array('h', 'e', 'l', 'l', 'o'), 3); ----- -2 l - -query IT -select array_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); ----- -2 l - -query IT -select array_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), array_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); ----- -2 l - -# list_element scalar function #9 (function alias `array_element`) -query IT -select list_element(make_array(1, 2, 3, 4, 5), 2), list_element(make_array('h', 'e', 'l', 'l', 'o'), 3); ----- -2 l - -query IT -select list_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); ----- -2 l - -query IT -select list_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), list_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); ----- -2 l - -# list_extract scalar function #10 (function alias `array_element`) -query IT -select list_extract(make_array(1, 2, 3, 4, 5), 2), list_extract(make_array('h', 'e', 'l', 'l', 'o'), 3); ----- -2 l - -query IT -select list_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), list_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); ----- -2 l - -query IT -select list_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), list_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); ----- -2 l - -# array_element with columns -query I -select array_element(column1, column2) from slices; ----- -NULL -12 -NULL -37 -NULL -NULL -55 - -query I -select array_element(arrow_cast(column1, 'LargeList(Int64)'), column2) from slices; ----- -NULL -12 -NULL -37 -NULL -NULL -55 - -query I -select array_element(column1, column2) from fixed_slices; ----- -NULL -12 -NULL -37 -NULL -55 - -# array_element with columns and scalars -query II -select array_element(make_array(1, 2, 3, 4, 5), column2), array_element(column1, 3) from slices; ----- -1 3 -2 13 -NULL 23 -2 33 -4 NULL -NULL 43 -5 NULL - -query II -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), column2), array_element(arrow_cast(column1, 'LargeList(Int64)'), 3) from slices; ----- -1 3 -2 13 -NULL 23 -2 33 -4 NULL -NULL 43 -5 NULL - -query II -select array_element(make_array(1, 2, 3, 4, 5), column2), array_element(column1, 3) from fixed_slices; ----- -1 3 -2 13 -NULL 23 -2 33 -NULL 43 -5 NULL - -# array_element of empty array -query T -select coalesce(array_element([], 1), array_element(NULL, 1), 'ok'); ----- -ok - - -## array_max -# array_max scalar function #1 (with positive index) -query I -select array_max(make_array(5, 3, 6, 4)); ----- -6 - -query I -select array_max(make_array(5, 3, 4, NULL, 6, NULL)); ----- -6 - -query ? -select array_max(make_array(NULL, NULL)); ----- -NULL - -query T -select array_max(make_array('h', 'e', 'o', 'l', 'l')); ----- -o - -query T -select array_max(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL)); ----- -o - -query B -select array_max(make_array(false, true, false, true)); ----- -true - -query B -select array_max(make_array(false, true, NULL, false, true)); ----- -true - -query D -select array_max(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1999-05-01', DATE '1985-11-01')); ----- -1999-05-01 - -query D -select array_max(make_array(DATE '1995-09-01', DATE '1999-05-01', DATE '1993-03-01', NULL)); ----- -1999-05-01 - -query P -select array_max(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1995-06-01', TIMESTAMP '1984-10-01')); ----- -1995-06-01T00:00:00 - -query P -select array_max(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01')); ----- -1996-10-01T00:00:00 - -query R -select array_max(make_array(5.1, -3.2, 6.3, 4.9)); ----- -6.3 - -query ?I -select input, array_max(input) from (select make_array(d - 1, d, d + 1) input from (values (0), (10), (20), (30), (NULL)) t(d)) ----- -[-1, 0, 1] 1 -[9, 10, 11] 11 -[19, 20, 21] 21 -[29, 30, 31] 31 -[NULL, NULL, NULL] NULL - -query II -select array_max(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_max(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -3 1 - -query II -select array_max(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_max(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); ----- -3 1 - -query ? -select array_max(make_array()); ----- -NULL - -# Testing with empty arguments should result in an error -query error DataFusion error: Error during planning: 'array_max' does not support zero arguments -select array_max(); - -# array_max over multiple rows (exercises the offsets-based iteration) -query I -select array_max(column1) from (values - (make_array(1, 5, 3)), - (make_array(10, 2, 8)), - (NULL), - (make_array(NULL, 7, NULL)), - (make_array(100)) -) as t(column1); ----- -5 -10 -NULL -7 -100 - -# array_max with NaN values (NaN should not be returned as max) -query R -select array_max(make_array(1.0, 'NaN'::double, 3.0)); ----- -NaN - -query R -select array_max(make_array('NaN'::double, 'NaN'::double)); ----- -NaN - -query R -select array_max(make_array('NaN'::double, NULL)); ----- -NaN - -# array_max with Int32 (exercises a different primitive type than Int64) -query I -select array_max(arrow_cast(make_array(10, -5, 3), 'List(Int32)')); ----- -10 - -## array_min - -query I -select array_min(make_array(5, 3, 6, 4)); ----- -3 - -query I -select array_min(make_array(5, 3, 4, NULL, 6, NULL)); ----- -3 - -query ? -select array_min(make_array(NULL, NULL)); ----- -NULL - -query T -select array_min(make_array('h', 'e', 'o', 'l', 'l')); ----- -e - -query T -select array_min(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL)); ----- -e - -query B -select array_min(make_array(false, true, false, true)); ----- -false - -query B -select array_min(make_array(false, true, NULL, false, true)); ----- -false - -query D -select array_min(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1999-05-01', DATE '1985-11-01')); ----- -1985-11-01 - -query D -select array_min(make_array(DATE '1995-09-01', DATE '1999-05-01', DATE '1993-03-01', NULL)); ----- -1993-03-01 - -query P -select array_min(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1995-06-01', TIMESTAMP '1984-10-01')); ----- -1984-10-01T00:00:00 - -query P -select array_min(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01')); ----- -1995-06-01T00:00:00 - -query R -select array_min(make_array(5.1, -3.2, 6.3, 4.9)); ----- --3.2 - -query ?I -select input, array_min(input) from (select make_array(d - 1, d, d + 1) input from (values (0), (10), (20), (30), (NULL)) t(d)) ----- -[-1, 0, 1] -1 -[9, 10, 11] 9 -[19, 20, 21] 19 -[29, 30, 31] 29 -[NULL, NULL, NULL] NULL - -query II -select array_min(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_min(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -1 1 - -query II -select array_min(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_min(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); ----- -1 1 - -query ? -select array_min(make_array()); ----- -NULL - -# Testing with empty arguments should result in an error -query error DataFusion error: Error during planning: 'array_min' does not support zero arguments -select array_min(); - -# array_min over multiple rows (exercises the offsets-based iteration) -query I -select array_min(column1) from (values - (make_array(1, 5, 3)), - (make_array(10, 2, 8)), - (NULL), - (make_array(NULL, 7, NULL)), - (make_array(100)) -) as t(column1); ----- -1 -2 -NULL -7 -100 - -# array_min with NaN values (NaN should not be returned as min) -query R -select array_min(make_array(1.0, 'NaN'::double, 3.0)); ----- -1 - -query R -select array_min(make_array('NaN'::double, 'NaN'::double)); ----- -NaN - -query R -select array_min(make_array('NaN'::double, NULL)); ----- -NaN - -# array_min with Int32 (exercises a different primitive type than Int64) -query I -select array_min(arrow_cast(make_array(10, -5, 3), 'List(Int32)')); ----- --5 - -# array_min/array_max preserve parameterized primitive metadata -query PPTT -select - array_min(ts_list), - array_max(ts_list), - arrow_typeof(array_min(ts_list)), - arrow_typeof(array_max(ts_list)) -from ( - select arrow_cast( - make_array( - arrow_cast(20, 'Timestamp(Nanosecond, Some("UTC"))'), - arrow_cast(10, 'Timestamp(Nanosecond, Some("UTC"))'), - arrow_cast(30, 'Timestamp(Nanosecond, Some("UTC"))') - ), - 'List(Timestamp(Nanosecond, Some("UTC")))' - ) as ts_list -) t; ----- -1970-01-01T00:00:00.000000010Z 1970-01-01T00:00:00.000000030Z Timestamp(ns, "UTC") Timestamp(ns, "UTC") - -query RRTT -select - array_min(dec_list), - array_max(dec_list), - arrow_typeof(array_min(dec_list)), - arrow_typeof(array_max(dec_list)) -from ( - select arrow_cast( - make_array( - arrow_cast(200, 'Decimal128(20, 4)'), - arrow_cast(100, 'Decimal128(20, 4)'), - arrow_cast(300, 'Decimal128(20, 4)') - ), - 'List(Decimal128(20, 4))' - ) as dec_list -) t; ----- -100 300 Decimal128(20, 4) Decimal128(20, 4) - - -## array_pop_back (aliases: `list_pop_back`) - -# array_pop_back scalar function with null -#TODO: https://github.com/apache/datafusion/issues/7142 -# follow clickhouse and duckdb -#query ? -#select array_pop_back(null); -#---- -#NULL - -# array_pop_back scalar function #1 -query ?? -select array_pop_back(make_array(1, 2, 3, 4, 5)), array_pop_back(make_array('h', 'e', 'l', 'l', 'o')); ----- -[1, 2, 3, 4] [h, e, l, l] - -query ?? -select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), array_pop_back(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -[1, 2, 3, 4] [h, e, l, l] - -query ?? -select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); ----- -[1, 2, 3, 4] [h, e, l, l] - -# array_pop_back scalar function #2 (after array_pop_back, array is empty) -query ? -select array_pop_back(make_array(1)); ----- -[] - -query ? -select array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -[] - -query ? -select array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); ----- -[] - -# array_pop_back scalar function #3 (array_pop_back the empty array) -query ? -select array_pop_back(array_pop_back(make_array(1))); ----- -[] - -query ? -select array_pop_back(array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64)'))); ----- -[] - -query ? -select array_pop_back(array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'))); ----- -[] - -# array_pop_back scalar function #4 (array_pop_back the arrays which have NULL) -query ?? -select array_pop_back(make_array(1, 2, 3, 4, NULL)), array_pop_back(make_array(NULL, 'e', 'l', NULL, 'o')); ----- -[1, 2, 3, 4] [NULL, e, l, NULL] - -query ?? -select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'LargeList(Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'LargeList(Utf8)')); ----- -[1, 2, 3, 4] [NULL, e, l, NULL] - -query ?? -select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'FixedSizeList(5, Utf8)')); ----- -[1, 2, 3, 4] [NULL, e, l, NULL] - -# array_pop_back scalar function #5 (array_pop_back the nested arrays) -query ? -select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6))); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'LargeList(List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -# array_pop_back scalar function #6 (array_pop_back the nested arrays with NULL) -query ? -select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL)); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL), 'LargeList(List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL), 'FixedSizeList(6, List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -# array_pop_back scalar function #7 (array_pop_back the nested arrays with NULL) -query ? -select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4))); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], NULL] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)), 'LargeList(List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], NULL] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)), 'FixedSizeList(5, List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], NULL] - -# array_pop_back scalar function #8 (after array_pop_back, nested array is empty) -query ? -select array_pop_back(make_array(make_array(1, 2, 3))); ----- -[] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(List(Int64))')); ----- -[] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)), 'FixedSizeList(1, List(Int64))')); ----- -[] - -# array_pop_back with columns -query ? -select array_pop_back(column1) from arrayspop; ----- -[1, 2] -[3, 4, 5] -[6, 7, 8, NULL] -[NULL, NULL] -NULL -[NULL, 10, 11] - -query ? -select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from arrayspop; ----- -[1, 2] -[3, 4, 5] -[6, 7, 8, NULL] -[NULL, NULL] -NULL -[NULL, 10, 11] - -query ? -select array_pop_back(column1) from large_arrayspop; ----- -[1, 2] -[3, 4, 5] -[6, 7, 8, NULL] -[NULL, NULL] -NULL -[NULL, 10, 11] - -query ? -select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from large_arrayspop; ----- -[1, 2] -[3, 4, 5] -[6, 7, 8, NULL] -[NULL, NULL] -NULL -[NULL, 10, 11] - -## array_pop_front (aliases: `list_pop_front`) - -#TODO:https://github.com/apache/datafusion/issues/7142 -# array_pop_front scalar function with null -# follow clickhouse and duckdb -#query ? -#select array_pop_front(null); -#---- -#NULL - -# array_pop_front scalar function #1 -query ?? -select array_pop_front(make_array(1, 2, 3, 4, 5)), array_pop_front(make_array('h', 'e', 'l', 'l', 'o')); ----- -[2, 3, 4, 5] [e, l, l, o] - -query ?? -select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), array_pop_front(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -[2, 3, 4, 5] [e, l, l, o] - -query ?? -select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_pop_front(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); ----- -[2, 3, 4, 5] [e, l, l, o] - -# array_pop_front scalar function #2 (after array_pop_front, array is empty) -query ? -select array_pop_front(make_array(1)); ----- -[] - -query ? -select array_pop_front(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -[] - -query ? -select array_pop_front(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); ----- -[] - -# array_pop_front scalar function #3 (array_pop_front the empty array) -query ? -select array_pop_front(array_pop_front(make_array(1))); ----- -[] - -query ? -select array_pop_front(array_pop_front(arrow_cast(make_array(1), 'LargeList(Int64)'))); ----- -[] - -query ? -select array_pop_front(array_pop_front(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'))); ----- -[] - -# array_pop_front scalar function #5 (array_pop_front the nested arrays) -query ? -select array_pop_front(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6))); ----- -[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] - -query ? -select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'LargeList(List(Int64))')); ----- -[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] - -query ? -select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))')); ----- -[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] - -# array_pop_front scalar function #6 (array_pop_front the nested arrays with NULL) -query ? -select array_pop_front(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4))); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -query ? -select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)), 'LargeList(List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -query ? -select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)), 'FixedSizeList(6, List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -# array_pop_front scalar function #8 (after array_pop_front, nested array is empty) -query ? -select array_pop_front(make_array(make_array(1, 2, 3))); ----- -[] - -query ? -select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(List(Int64))')); ----- -[] - -query ? -select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)), 'FixedSizeList(1, List(Int64))')); ----- -[] - -## array_slice (aliases: list_slice) - -# array_slice scalar function #1 (with positive indexes) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 2, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 2); ----- -[2, 3, 4] [h, e] - -query ???? -select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, 2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, 2), - array_slice(make_array(1, 2, 3, 4, 5), 0, 5, 2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 5, 2); ----- -[1, 3, 5] [h, l, o] [1, 3, 5] [h, l, o] - -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, -1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, -1); ----- -[] [] - -query error Execution error: array_slice got invalid stride: 0, it cannot be 0 -select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, 0), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, 0); - -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 5, 1, -2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 5, 1, -2); ----- -[5, 3, 1] [o, l, h] - -# Test NULL stride -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, NULL); ----- -NULL NULL - -# Test NULL stride -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 1, 5, NULL), - array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 1, 5, NULL); ----- -NULL NULL - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 1, 2); ----- -[2, 3, 4] [h, e] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2, 4), - array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 1, 2); ----- -[2, 3, 4] [h, e] - -# array_slice scalar function #2 (with positive indexes; full array) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 0, 6), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 5); ----- -[1, 2, 3, 4, 5] [h, e, l, l, o] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, 6), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, 5); ----- -[1, 2, 3, 4, 5] [h, e, l, l, o] - -# TODO make error message nicer: https://github.com/apache/datafusion/issues/19004 -# Expected output (once supported): -# ---- -# [1, 2, 3, 4, 5] [h, e, l, l, o] -query error Failed to coerce arguments to satisfy a call to 'array_slice' function: -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'ListView(Int64)'), 0, 6), - array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'ListView(Utf8)'), 0, 5); - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 0, 6), - array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 0, 5); ----- -[1, 2, 3, 4, 5] [h, e, l, l, o] - -# array_slice scalar function #3 (with positive indexes; first index = second index) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 4, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 3, 3); ----- -[4] [l] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 4, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3, 3); ----- -[4] [l] - -# array_slice scalar function #4 (with positive indexes; first index > second_index) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 2, 1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 4, 1); ----- -[] [] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 1), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 4, 1); ----- -[] [] - -# array_slice scalar function #5 (with positive indexes; out of bounds) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 2, 6), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 3, 7); ----- -[2, 3, 4, 5] [l, l, o] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 6), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3, 7); ----- -[2, 3, 4, 5] [l, l, o] - -# TODO: Enable once array_slice supports LargeListView types. -# Expected output (once supported): -# ---- -# [2, 3, 4, 5] [l, l, o] -query error Failed to coerce arguments to satisfy a call to 'array_slice' function: -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeListView(Int64)'), 2, 6), - array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeListView(Utf8)'), 3, 7); - - -# array_slice scalar function #6 (with positive indexes; nested array) -query ? -select array_slice(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 1, 1); ----- -[[1, 2, 3, 4, 5]] - -query ? -select array_slice(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'LargeList(List(Int64))'), 1, 1); ----- -[[1, 2, 3, 4, 5]] - -# array_slice scalar function #7 (with zero and positive number) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 0, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 3); ----- -[1, 2, 3, 4] [h, e, l] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, 3); ----- -[1, 2, 3, 4] [h, e, l] - -# array_slice scalar function #8 (with NULL and positive number) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), NULL, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), NULL, 3); ----- -NULL NULL - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL, 3); ----- -NULL NULL - -# array_slice scalar function #9 (with positive number and NULL) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 2, NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 3, NULL); ----- -NULL NULL - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, NULL), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3, NULL); ----- -NULL NULL - -# array_slice scalar function #10 (with zero-zero) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 0, 0), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 0); ----- -[] [] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, 0), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, 0); ----- -[] [] - -# array_slice scalar function #11 (with NULL-NULL) -query error -select array_slice(make_array(1, 2, 3, 4, 5), NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), NULL); - -query error -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL); - -# array_slice scalar function #12 (with zero and negative number) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 0, -4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, -3); ----- -[1, 2] [h, e, l] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, -4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, -3); ----- -[1, 2] [h, e, l] - -# array_slice scalar function #13 (with negative number and NULL) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -2, NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, NULL); ----- -NULL NULL - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -2, NULL), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, NULL); ----- -NULL NULL - -# array_slice scalar function #14 (with NULL and negative number) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), NULL, -4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), NULL, -3); ----- -NULL NULL - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, -4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL, -3); ----- -NULL NULL - -# array_slice scalar function #15 (with negative indexes) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -4, -1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, -1); ----- -[2, 3, 4, 5] [l, l, o] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -4, -1), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, -1); ----- -[2, 3, 4, 5] [l, l, o] - -# array_slice scalar function #16 (with negative indexes; almost full array (only with negative indices cannot return full array)) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -5, -1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -5, -1); ----- -[1, 2, 3, 4, 5] [h, e, l, l, o] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -5, -1), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -5, -1); ----- -[1, 2, 3, 4, 5] [h, e, l, l, o] - -# array_slice scalar function #17 (with negative indexes; first index = second index) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -4, -4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, -3); ----- -[2] [l] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -4, -4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, -3); ----- -[2] [l] - -# array_slice scalar function #18 (with negative indexes; first index > second_index) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -4, -6), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, -6); ----- -[] [] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -4, -6), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, -6); ----- -[] [] - -# array_slice scalar function #19 (with negative indexes; out of bounds) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -7, -2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -7, -3); ----- -[1, 2, 3, 4] [h, e, l] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -7, -2), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -7, -3); ----- -[1, 2, 3, 4] [h, e, l] - -# array_slice scalar function #20 (with negative indexes; nested array) -query ?? -select array_slice(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), -2, -1), array_slice(make_array(make_array(1, 2, 3), make_array(6, 7, 8)), -1, -1); ----- -[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]] [[6, 7, 8]] - -query ?? -select array_slice(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'LargeList(List(Int64))'), -2, -1), array_slice(arrow_cast(make_array(make_array(1, 2, 3), make_array(6, 7, 8)), 'LargeList(List(Int64))'), -1, -1); ----- -[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]] [[6, 7, 8]] - - -# array_slice scalar function #21 (with first positive index and last negative index) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 2, -3), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 2, -2); ----- -[2, 3] [e, l, l] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, -3), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 2, -2); ----- -[2, 3] [e, l, l] - -# array_slice scalar function #22 (with first negative index and last positive index) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -2, 5), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, 4); ----- -[4, 5] [l, l] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -2, 5), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, 4); ----- -[4, 5] [l, l] - -# list_slice scalar function #23 (function alias `array_slice`) -query ?? -select list_slice(make_array(1, 2, 3, 4, 5), 2, 4), list_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 2); ----- -[2, 3, 4] [h, e] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 1, 2); ----- -[2, 3, 4] [h, e] - -# array_slice scalar function #24 (with first negative index larger than len) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -2147483648, 1), list_slice(make_array('h', 'e', 'l', 'l', 'o'), -2147483648, 1); ----- -[1] [h] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -9223372036854775808, 1), list_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -9223372036854775808, 1); ----- -[1] [h] - -# array_slice scalar function #25 (with negative step and equal indexes) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 2, 2, -1), list_slice(make_array('h', 'e', 'l', 'l', 'o'), 2, 2, -1); ----- -[2] [e] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 2, -1), list_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 2, 2, -1); ----- -[2] [e] - -# array_slice with columns -query ? -select array_slice(column1, column2, column3) from slices; ----- -[NULL] -[12, 13, 14, 15, 16, 17] -[] -[] -NULL -NULL -NULL - -query ? -select array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from slices; ----- -[NULL] -[12, 13, 14, 15, 16, 17] -[] -[] -NULL -NULL -NULL - -# TODO: support NULLS in output instead of `[]` -# array_slice with columns and scalars -query ??? -select array_slice(make_array(1, 2, 3, 4, 5), column2, column3), array_slice(column1, 3, column3), array_slice(column1, column2, 5) from slices; ----- -[1] [] [NULL, 2, 3, 4, 5] -[2] [13, 14, 15, 16, 17] [12, 13, 14, 15] -[] [] [21, 22, 23, NULL, 25] -[] [33, 34] [] -[4, 5] NULL NULL -NULL [43, 44, 45, 46] NULL -NULL NULL [55] - -query ??? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), column2, column3), array_slice(arrow_cast(column1, 'LargeList(Int64)'), 3, column3), array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, 5) from slices; ----- -[1] [] [NULL, 2, 3, 4, 5] -[2] [13, 14, 15, 16, 17] [12, 13, 14, 15] -[] [] [21, 22, 23, NULL, 25] -[] [33, 34] [] -[4, 5] NULL NULL -NULL [43, 44, 45, 46] NULL -NULL NULL [55] - -# Test issue: https://github.com/apache/datafusion/issues/10425 -# `from` may be larger than `to` and `stride` is positive -query ???? -select array_slice(a, -1, 2, 1), array_slice(a, -1, 2), - array_slice(a, 3, 2, 1), array_slice(a, 3, 2) - from (values ([1.0, 2.0, 3.0, 3.0]), ([4.0, 5.0, 3.0]), ([6.0])) t(a); ----- -[] [] [] [] -[] [] [] [] -[6.0] [6.0] [] [] - -# array_slice with overlapping nulls across multiple inputs -query ? -select array_slice(column1, column2, column3) from ( - values - (make_array(1, 2, 3), NULL, NULL), - (NULL, NULL, 3), - (NULL, 1, NULL), - (make_array(4, 5, 6), 1, 3) -) as t(column1, column2, column3); ----- -NULL -NULL -NULL -[4, 5, 6] - -query ? -select array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from ( - values - (make_array(1, 2, 3), NULL, NULL), - (NULL, NULL, 3), - (NULL, 1, NULL), - (make_array(4, 5, 6), 1, 3) -) as t(column1, column2, column3); ----- -NULL -NULL -NULL -[4, 5, 6] - -# array_slice with overlapping nulls including stride -query ? -select array_slice(column1, column2, column3, column4) from ( - values - (make_array(1, 2, 3, 4, 5), 1, 5, NULL), - (NULL, NULL, 3, 2), - (make_array(1, 2, 3, 4, 5), NULL, NULL, NULL), - (make_array(1, 2, 3, 4, 5), 1, 5, 2) -) as t(column1, column2, column3, column4); ----- -NULL -NULL -NULL -[1, 3, 5] - -# Testing with empty arguments should result in an error -query error DataFusion error: Error during planning: 'array_slice' does not support zero arguments -select array_slice(); - -query error Failed to coerce arguments -select array_slice(3.5, NULL, NULL); - -## array_any_value (aliases: list_any_value) - -# Testing with empty arguments should result in an error -query error -select array_any_value(); - -# Testing with non-array arguments should result in an error -query error -select array_any_value(1), array_any_value('a'), array_any_value(NULL); - -# array_any_value scalar function #1 (with null and non-null elements) - -query IT?I -select array_any_value(make_array(NULL, 1, 2, 3, 4, 5)), array_any_value(make_array(NULL, 'h', 'e', 'l', 'l', 'o')), array_any_value(make_array(NULL, NULL)), array_any_value(make_array(NULL, NULL, 1, 2, 3)); ----- -1 h NULL 1 - -query ITITI -select array_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'LargeList(Int64)')), array_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'LargeList(Int64)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'LargeList(Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL, 1, 2, 3), 'LargeList(Int64)'));; ----- -1 h NULL NULL 1 - -query ITITI -select array_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'FixedSizeList(6, Int64)')), array_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'FixedSizeList(6, Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'FixedSizeList(2, Int64)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'FixedSizeList(2, Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL, 1, 2, 3, 4), 'FixedSizeList(6, Int64)')); ----- -1 h NULL NULL 1 - -# array_any_value scalar function #2 (with nested array) - -query ? -select array_any_value(make_array(NULL, make_array(NULL, 1, 2, 3, 4, 5), make_array(NULL, 6, 7, 8, 9, 10))); ----- -[NULL, 1, 2, 3, 4, 5] - -query ? -select array_any_value(arrow_cast(make_array(NULL, make_array(NULL, 1, 2, 3, 4, 5), make_array(NULL, 6, 7, 8, 9, 10)), 'LargeList(List(Int64))')); ----- -[NULL, 1, 2, 3, 4, 5] - -query ? -select array_any_value(arrow_cast(make_array(NULL, make_array(NULL, 1, 2, 3, 4, 5), make_array(NULL, 6, 7, 8, 9, 10)), 'FixedSizeList(3, List(Int64))')); ----- -[NULL, 1, 2, 3, 4, 5] - -# array_any_value scalar function #3 (using function alias `list_any_value`) -query IT -select list_any_value(make_array(NULL, 1, 2, 3, 4, 5)), list_any_value(make_array(NULL, 'h', 'e', 'l', 'l', 'o')); ----- -1 h - -query IT -select list_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'LargeList(Int64)')), list_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -1 h - -query IT -select list_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'FixedSizeList(6, Int64)')), list_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'FixedSizeList(6, Utf8)')); ----- -1 h - -# array_any_value with columns - -query I -select array_any_value(column1) from slices; ----- -2 -11 -21 -31 -NULL -41 -51 - -query I -select array_any_value(arrow_cast(column1, 'LargeList(Int64)')) from slices; ----- -2 -11 -21 -31 -NULL -41 -51 - -query I -select array_any_value(column1) from fixed_slices; ----- -2 -11 -21 -31 -41 -51 - -# array_any_value with columns and scalars - -query II -select array_any_value(make_array(NULL, 1, 2, 3, 4, 5)), array_any_value(column1) from slices; ----- -1 2 -1 11 -1 21 -1 31 -1 NULL -1 41 -1 51 - -query II -select array_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'LargeList(Int64)')), array_any_value(arrow_cast(column1, 'LargeList(Int64)')) from slices; ----- -1 2 -1 11 -1 21 -1 31 -1 NULL -1 41 -1 51 - -query II -select array_any_value(make_array(NULL, 1, 2, 3, 4, 5)), array_any_value(column1) from fixed_slices; ----- -1 2 -1 11 -1 21 -1 31 -1 41 -1 51 - -# make_array with nulls -query ??????? -select make_array(make_array('a','b'), null), - make_array(make_array('a','b'), null, make_array('c','d')), - make_array(null, make_array('a','b'), null), - make_array(null, make_array('a','b'), null, null, make_array('c','d')), - make_array(['a', 'bc', 'def'], null, make_array('rust')), - make_array([1,2,3], null, make_array(4,5,6,7)), - make_array(null, 1, null, 2, null, 3, null, null, 4, 5); ----- -[[a, b], NULL] [[a, b], NULL, [c, d]] [NULL, [a, b], NULL] [NULL, [a, b], NULL, NULL, [c, d]] [[a, bc, def], NULL, [rust]] [[1, 2, 3], NULL, [4, 5, 6, 7]] [NULL, 1, NULL, 2, NULL, 3, NULL, NULL, 4, 5] - -query ? -select make_array(column5, null, column5) from arrays_values_without_nulls; ----- -[[2, 3], NULL, [2, 3]] -[[4, 5], NULL, [4, 5]] -[[6, 7], NULL, [6, 7]] -[[8, 9], NULL, [8, 9]] - -query ? -select make_array(['a','b'], null); ----- -[[a, b], NULL] - -## array_sort (aliases: `list_sort`) -query ??? -select array_sort(make_array(1, 3, null, 5, NULL, -5)), array_sort(make_array(1, 3, null, 2), 'ASC'), array_sort(make_array(1, 3, null, 2), 'desc', 'NULLS FIRST'); ----- -[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] - -query ??? -select array_sort(arrow_cast(make_array(1, 3, null, 5, NULL, -5), 'LargeList(Int64)')), - array_sort(arrow_cast(make_array(1, 3, null, 2), 'LargeList(Int64)'), 'ASC'), - array_sort(arrow_cast(make_array(1, 3, null, 2), 'LargeList(Int64)'), 'desc', 'NULLS FIRST'); ----- -[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] - -query ??? -select array_sort(arrow_cast(make_array(1, 3, null, 5, NULL, -5), 'FixedSizeList(6, Int64)')), - array_sort(arrow_cast(make_array(1, 3, null, 2), 'FixedSizeList(4, Int64)'), 'ASC'), - array_sort(arrow_cast(make_array(1, 3, null, 2), 'FixedSizeList(4, Int64)'), 'desc', 'NULLS FIRST'); ----- -[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] - -query ? -select array_sort(column1, 'DESC', 'NULLS LAST') from arrays_values; ----- -[10, 9, 8, 7, 6, 5, 4, 3, 2, NULL] -[20, 18, 17, 16, 15, 14, 13, 12, 11, NULL] -[30, 29, 28, 27, 26, 25, 23, 22, 21, NULL] -[40, 39, 38, 37, 35, 34, 33, 32, 31, NULL] -NULL -[50, 49, 48, 47, 46, 45, 44, 43, 42, 41] -[60, 59, 58, 57, 56, 55, 54, 52, 51, NULL] -[70, 69, 68, 67, 66, 65, 64, 63, 62, 61] - -query ? -select array_sort(column1, 'ASC', 'NULLS FIRST') from arrays_values; ----- -[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] -[NULL, 11, 12, 13, 14, 15, 16, 17, 18, 20] -[NULL, 21, 22, 23, 25, 26, 27, 28, 29, 30] -[NULL, 31, 32, 33, 34, 35, 37, 38, 39, 40] -NULL -[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] -[NULL, 51, 52, 54, 55, 56, 57, 58, 59, 60] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70] - -# test with empty table -query ? -select array_sort(column1, 'DESC', 'NULLS FIRST') from arrays_values where false; ----- - -# test with empty array -query ? -select array_sort([]); ----- -[] - -# empty-but-non-null string arrays should remain non-null, not become null -query ?B -select array_sort(column1), array_sort(column1) is null -from (values (arrow_cast(make_array('b', 'a'), 'List(Utf8)')), (arrow_cast([], 'List(Utf8)'))) as t(column1); ----- -[a, b] false -[] false - -# test with null arguments -query ? -select array_sort(NULL); ----- -NULL - -query ? -select array_sort(column1, NULL) from arrays_values; ----- -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL - -query ?? -select array_sort(column1, 'DESC', NULL), array_sort(column1, 'ASC', NULL) from arrays_values; ----- -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL - -query ?? -select array_sort(column1, NULL, 'NULLS FIRST'), array_sort(column1, NULL, 'NULLS LAST') from arrays_values; ----- -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL - -# maintains inner nullability -query ?T -select array_sort(column1), arrow_typeof(array_sort(column1)) -from values - (arrow_cast([], 'List(non-null Int32)')), - (arrow_cast(NULL, 'List(non-null Int32)')), - (arrow_cast([1, 3, 5, -5], 'List(non-null Int32)')) -; ----- -[] List(non-null Int32) -NULL List(non-null Int32) -[-5, 1, 3, 5] List(non-null Int32) - -query ?T -select column1, arrow_typeof(column1) -from values (array_sort(arrow_cast([1, 3, 5, -5], 'LargeList(non-null Int32)'))); ----- -[-5, 1, 3, 5] LargeList(non-null Int32) - -query ?T -select column1, arrow_typeof(column1) -from values (array_sort(arrow_cast([1, 3, 5, -5], 'FixedSizeList(4 x non-null Int32)'))); ----- -[-5, 1, 3, 5] List(non-null Int32) - -# arrays of strings -query ??? -select array_sort(make_array('banana', 'apple', null, 'cherry')), - array_sort(make_array('banana', 'apple', null, 'cherry'), 'DESC', 'NULLS LAST'), - array_sort(make_array('banana', 'apple', null, 'cherry'), 'ASC', 'NULLS LAST'); ----- -[NULL, apple, banana, cherry] [cherry, banana, apple, NULL] [apple, banana, cherry, NULL] - -query ? -select array_sort([struct('foo', 3), struct('foo', 1), struct('bar', 1)]) ----- -[{c0: bar, c1: 1}, {c0: foo, c1: 1}, {c0: foo, c1: 3}] - -## test with argument of incorrect types -query error DataFusion error: Execution error: the second parameter of array_sort expects DESC or ASC -select array_sort([1, 3, null, 5, NULL, -5], 1), array_sort([1, 3, null, 5, NULL, -5], 'DESC', 1), array_sort([1, 3, null, 5, NULL, -5], 1, 1); - -# test with empty row, the row that does not match the condition has row count 0 -statement ok -create table t1(a int, b int) as values (100, 1), (101, 2), (102, 3), (101, 2); - -# rowsort is to ensure the order of group by is deterministic, array_sort has no effect here, since the sum() always returns single row. -query ? rowsort -select array_sort([sum(a)]) from t1 where a > 100 group by b; ----- -[102] -[202] - -statement ok -drop table t1; - -# float arrays with NaN and Infinity (NaN sorts after Infinity per IEEE totalOrder) -query ??? -select array_sort(make_array(1.0, 'NaN'::double, -1.0, 'Infinity'::double, '-Infinity'::double, null)), - array_sort(make_array(1.0, 'NaN'::double, -1.0, 'Infinity'::double, '-Infinity'::double, null), 'DESC', 'NULLS LAST'), - array_sort(make_array('NaN'::double, 'NaN'::double, 1.0)); ----- -[NULL, -inf, -1.0, 1.0, inf, NaN] [NaN, inf, 1.0, -1.0, -inf, NULL] [1.0, NaN, NaN] - -# float32 arrays -query ?? -select array_sort(arrow_cast(make_array(3.0, 1.0, 'NaN'::double, null, 2.0), 'List(Float32)')), - array_sort(arrow_cast(make_array(3.0, 1.0, 'NaN'::double, null, 2.0), 'List(Float32)'), 'DESC', 'NULLS LAST'); ----- -[NULL, 1.0, 2.0, 3.0, NaN] [NaN, 3.0, 2.0, 1.0, NULL] - -# element-level nulls with all sort option combinations -query ???? -select array_sort(make_array(3, null, 1, null, 2), 'ASC', 'NULLS FIRST'), - array_sort(make_array(3, null, 1, null, 2), 'ASC', 'NULLS LAST'), - array_sort(make_array(3, null, 1, null, 2), 'DESC', 'NULLS FIRST'), - array_sort(make_array(3, null, 1, null, 2), 'DESC', 'NULLS LAST'); ----- -[NULL, NULL, 1, 2, 3] [1, 2, 3, NULL, NULL] [NULL, NULL, 3, 2, 1] [3, 2, 1, NULL, NULL] - -# timestamp arrays -query ?? -select array_sort(make_array(arrow_cast('2024-01-15T10:00:00', 'Timestamp(Nanosecond, None)'), - arrow_cast('2024-01-01T00:00:00', 'Timestamp(Nanosecond, None)'), - null, - arrow_cast('2024-06-15T12:00:00', 'Timestamp(Nanosecond, None)'))), - array_sort(make_array(arrow_cast('2024-01-15T10:00:00', 'Timestamp(Nanosecond, None)'), - arrow_cast('2024-01-01T00:00:00', 'Timestamp(Nanosecond, None)'), - null, - arrow_cast('2024-06-15T12:00:00', 'Timestamp(Nanosecond, None)')), 'DESC', 'NULLS LAST'); ----- -[NULL, 2024-01-01T00:00:00, 2024-01-15T10:00:00, 2024-06-15T12:00:00] [2024-06-15T12:00:00, 2024-01-15T10:00:00, 2024-01-01T00:00:00, NULL] - -# date arrays -query ?? -select array_sort(make_array('2024-03-01'::date, '2024-01-01'::date, null, '2024-02-01'::date)), - array_sort(make_array('2024-03-01'::date, '2024-01-01'::date, null, '2024-02-01'::date), 'DESC', 'NULLS LAST'); ----- -[NULL, 2024-01-01, 2024-02-01, 2024-03-01] [2024-03-01, 2024-02-01, 2024-01-01, NULL] - -# struct arrays with nulls and DESC -query ?? -select array_sort([struct('b', 2), struct('a', 1), null, struct('a', 3)]), - array_sort([struct('b', 2), struct('a', 1), null, struct('a', 3)], 'DESC', 'NULLS LAST'); ----- -[NULL, {c0: a, c1: 1}, {c0: a, c1: 3}, {c0: b, c1: 2}] [{c0: b, c1: 2}, {c0: a, c1: 3}, {c0: a, c1: 1}, NULL] - -# boolean arrays -query ?? -select array_sort(make_array(true, false, null, true, false)), - array_sort(make_array(true, false, null, true, false), 'DESC', 'NULLS LAST'); ----- -[NULL, false, false, true, true] [true, true, false, false, NULL] - -# all-null array -query ? -select array_sort(make_array(null, null, null)); ----- -[NULL, NULL, NULL] - -# single-element arrays -query ?? -select array_sort(make_array(42)), array_sort(make_array(null::int)); ----- -[42] [NULL] - -## list_sort (aliases: `array_sort`) -query ??? -select list_sort(make_array(1, 3, null, 5, NULL, -5)), list_sort(make_array(1, 3, null, 2), 'ASC'), list_sort(make_array(1, 3, null, 2), 'desc', 'NULLS FIRST'); ----- -[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] - - -## array_append (aliases: `list_append`, `array_push_back`, `list_push_back`) - -# array_append with NULLs - -query ? -select array_append(null, 1); ----- -[1] - -query ? -select array_append(null, [2, 3]); ----- -[[2, 3]] - -query ? -select array_append(null, [[4]]); ----- -[[[4]]] - -query ???? -select - array_append(make_array(), 4), - array_append(make_array(), null), - array_append(make_array(1, null, 3), 4), - array_append(make_array(null, null), 1) -; ----- -[4] [NULL] [1, NULL, 3, 4] [NULL, NULL, 1] - -query ???? -select - array_append(arrow_cast(make_array(), 'LargeList(Int64)'), 4), - array_append(arrow_cast(make_array(), 'LargeList(Int64)'), null), - array_append(arrow_cast(make_array(1, null, 3), 'LargeList(Int64)'), 4), - array_append(arrow_cast(make_array(null, null), 'LargeList(Int64)'), 1) -; ----- -[4] [NULL] [1, NULL, 3, 4] [NULL, NULL, 1] - -query ?? -select - array_append(arrow_cast(make_array(1, null, 3), 'FixedSizeList(3, Int64)'), 4), - array_append(arrow_cast(make_array(null, null), 'FixedSizeList(2, Int64)'), 1) -; ----- -[1, NULL, 3, 4] [NULL, NULL, 1] - -# test invalid (non-null) -query error -select array_append(1, 2); - -query error -select array_append(1, [2]); - -query error -select array_append([1], [2]); - -query ?? -select - array_append(make_array(make_array(1, null, 3)), make_array(null)), - array_append(make_array(make_array(1, null, 3)), null); ----- -[[1, NULL, 3], [NULL]] [[1, NULL, 3], NULL] - -query ?? -select - array_append(arrow_cast(make_array(make_array(1, null, 3)), 'LargeList(LargeList(Int64))'), arrow_cast(make_array(null), 'LargeList(Int64)')), - array_append(arrow_cast(make_array(make_array(1, null, 3)), 'LargeList(LargeList(Int64))'), null); ----- -[[1, NULL, 3], [NULL]] [[1, NULL, 3], NULL] - -query ?? -select - array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), [null]), - array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), null); ----- -[[1, NULL, 3], [NULL]] [[1, NULL, 3], NULL] - -# array_append scalar function #3 -query ??? -select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select array_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), array_append(arrow_cast(make_array('h', 'e', 'l', 'l'), 'LargeList(Utf8)'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select array_append(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'FixedSizeList(3, Float64)'), 4.0), array_append(arrow_cast(make_array('h', 'e', 'l', 'l'), 'FixedSizeList(4, Utf8)'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array_append scalar function #4 (element is list) -query ??? -select array_append(make_array([1], [2], [3]), make_array(4)), array_append(make_array([1.0], [2.0], [3.0]), make_array(4.0)), array_append(make_array(['h'], ['e'], ['l'], ['l']), make_array('o')); ----- -[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -query ??? -select array_append(arrow_cast(make_array([1], [2], [3]), 'LargeList(LargeList(Int64))'), arrow_cast(make_array(4), 'LargeList(Int64)')), array_append(arrow_cast(make_array([1.0], [2.0], [3.0]), 'LargeList(LargeList(Float64))'), arrow_cast(make_array(4.0), 'LargeList(Float64)')), array_append(arrow_cast(make_array(['h'], ['e'], ['l'], ['l']), 'LargeList(LargeList(Utf8))'), arrow_cast(make_array('o'), 'LargeList(Utf8)')); ----- -[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -query ??? -select array_append(arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3, List(Int64))'), [4]), array_append(arrow_cast(make_array([1.0], [2.0], [3.0]), 'FixedSizeList(3, List(Float64))'), [4.0]), array_append(arrow_cast(make_array(['h'], ['e'], ['l'], ['l']), 'FixedSizeList(4, List(Utf8))'), ['o']); ----- -[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -# list_append scalar function #5 (function alias `array_append`) -query ??? -select list_append(make_array(1, 2, 3), 4), list_append(make_array(1.0, 2.0, 3.0), 4.0), list_append(make_array('h', 'e', 'l', 'l'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select list_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), list_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), list_append(make_array('h', 'e', 'l', 'l'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array_push_back scalar function #6 (function alias `array_append`) -query ??? -select array_push_back(make_array(1, 2, 3), 4), array_push_back(make_array(1.0, 2.0, 3.0), 4.0), array_push_back(make_array('h', 'e', 'l', 'l'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select array_push_back(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), array_push_back(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), array_push_back(make_array('h', 'e', 'l', 'l'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# list_push_back scalar function #7 (function alias `array_append`) -query ??? -select list_push_back(make_array(1, 2, 3), 4), list_push_back(make_array(1.0, 2.0, 3.0), 4.0), list_push_back(make_array('h', 'e', 'l', 'l'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select list_push_back(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), list_push_back(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), list_push_back(make_array('h', 'e', 'l', 'l'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array_append with columns #1 -query ? -select array_append(column1, column2) from arrays_values; ----- -[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] -[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 12] -[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 23] -[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 34] -[44] -[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, NULL] -[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, 55] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] - -query ? -select array_append(column1, column2) from large_arrays_values; ----- -[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] -[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 12] -[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 23] -[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 34] -[44] -[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, NULL] -[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, 55] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] - -query ? -select array_append(column1, column2) from fixed_arrays_values; ----- -[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] -[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 12] -[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 23] -[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 34] -[NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 44] -[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, NULL] -[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, 55] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] - -# array_append with columns #2 (element is list) -query ? -select array_append(column1, column2) from nested_arrays; ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] -[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] - -query ? -select array_append(column1, column2) from large_nested_arrays; ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] -[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] - -query ? -select array_append(column1, column2) from fixed_size_nested_arrays; ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] -[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] - -# array_append with columns and scalars #1 -query ?? -select array_append(column2, 100.1), array_append(column3, '.') from arrays; ----- -[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] -[NULL, 5.5, 6.6, 100.1] [i, p, NULL, u, m, .] -[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .] -[10.1, NULL, 12.2, 100.1] [s, i, t, .] -[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .] -[100.1] [,, .] -[16.6, 17.7, 18.8, 100.1] [.] - -query ?? -select array_append(column2, 100.1), array_append(column3, '.') from large_arrays; ----- -[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] -[NULL, 5.5, 6.6, 100.1] [i, p, NULL, u, m, .] -[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .] -[10.1, NULL, 12.2, 100.1] [s, i, t, .] -[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .] -[100.1] [,, .] -[16.6, 17.7, 18.8, 100.1] [.] - -query ?? -select array_append(column2, 100.1), array_append(column3, '.') from fixed_size_arrays; ----- -[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] -[NULL, 5.5, 6.6, 100.1] [i, p, NULL, u, m, .] -[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .] -[10.1, NULL, 12.2, 100.1] [s, i, t, a, b, .] -[13.3, 14.4, 15.5, 100.1] [a, m, e, t, x, .] -[NULL, NULL, NULL, 100.1] [,, a, b, c, d, .] -[16.6, 17.7, 18.8, 100.1] [NULL, NULL, NULL, NULL, NULL, .] - -# array_append with columns and scalars #2 -query ?? -select array_append(column1, make_array(1, 11, 111)), array_append(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), column2) from nested_arrays; ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] -[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] - -query ?? -select array_append(column1, arrow_cast(make_array(1, 11, 111), 'LargeList(Int64)')), array_append(arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'LargeList(LargeList(Int64))'), column2) from large_nested_arrays; ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] -[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] - -query ?? -select array_append(column1, arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3, Int64)')), array_append(arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))'), column2) from fixed_size_nested_arrays; ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] -[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] - -## array_prepend (aliases: `list_prepend`, `array_push_front`, `list_push_front`) - -# array_prepend with NULLs - -# DuckDB: [4] -# ClickHouse: Null -query ? -select array_prepend(4, NULL); ----- -[4] - -query ? -select array_prepend(4, []); ----- -[4] - -query ? -select array_prepend(4, [null]); ----- -[4, NULL] - -# DuckDB: [null] -# ClickHouse: [null] -query ? -select array_prepend(null, []); ----- -[NULL] - -query ? -select array_prepend(null, [1]); ----- -[NULL, 1] - -query ? -select array_prepend(null, [[1,2,3]]); ----- -[NULL, [1, 2, 3]] - -# DuckDB: [[]] -# ClickHouse: [[]] -# TODO: We may also return [[]] -query ? -select array_prepend([], []); ----- -[[]] - -query ? -select array_prepend(null, null); ----- -[NULL] - -query ? -select array_append([], null); ----- -[NULL] - - -# array_prepend scalar function #3 -query ??? -select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0, make_array(2.0, 3.0, 4.0)), array_prepend('h', make_array('e', 'l', 'l', 'o')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select array_prepend(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), array_prepend(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), array_prepend('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select array_prepend(1, arrow_cast([2, 3, 4], 'FixedSizeList(3, Int64)')), array_prepend(1.0, arrow_cast([2.0, 3.0, 4.0], 'FixedSizeList(3, Float64)')), array_prepend('h', arrow_cast(['e', 'l', 'l', 'o'], 'FixedSizeList(4, Utf8)')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array_prepend scalar function #4 (element is list) -query ??? -select array_prepend(make_array(1), make_array(make_array(2), make_array(3), make_array(4))), array_prepend(make_array(1.0), make_array([2.0], [3.0], [4.0])), array_prepend(make_array('h'), make_array(['e'], ['l'], ['l'], ['o'])); ----- -[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -query ??? -select array_prepend(arrow_cast(make_array(1), 'LargeList(Int64)'), arrow_cast(make_array(make_array(2), make_array(3), make_array(4)), 'LargeList(LargeList(Int64))')), - array_prepend(arrow_cast(make_array(1.0), 'LargeList(Float64)'), arrow_cast(make_array([2.0], [3.0], [4.0]), 'LargeList(LargeList(Float64))')), - array_prepend(arrow_cast(make_array('h'), 'LargeList(Utf8)'), arrow_cast(make_array(['e'], ['l'], ['l'], ['o']), 'LargeList(LargeList(Utf8))')); ----- -[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -query ??? -select array_prepend(arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([[1], [2], [3]], 'FixedSizeList(3, List(Int64))')), - array_prepend(arrow_cast([1.0], 'FixedSizeList(1, Float64)'), arrow_cast([[2.0], [3.0], [4.0]], 'FixedSizeList(3, List(Float64))')), - array_prepend(arrow_cast(['h'], 'FixedSizeList(1, Utf8)'), arrow_cast([['e'], ['l'], ['l'], ['o']], 'FixedSizeList(4, List(Utf8))')); ----- -[[1], [1], [2], [3]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -# list_prepend scalar function #5 (function alias `array_prepend`) -query ??? -select list_prepend(1, make_array(2, 3, 4)), list_prepend(1.0, make_array(2.0, 3.0, 4.0)), list_prepend('h', make_array('e', 'l', 'l', 'o')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select list_prepend(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), list_prepend(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), list_prepend('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array_push_front scalar function #6 (function alias `array_prepend`) -query ??? -select array_push_front(1, make_array(2, 3, 4)), array_push_front(1.0, make_array(2.0, 3.0, 4.0)), array_push_front('h', make_array('e', 'l', 'l', 'o')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select array_push_front(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), array_push_front(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), array_push_front('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# list_push_front scalar function #7 (function alias `array_prepend`) -query ??? -select list_push_front(1, make_array(2, 3, 4)), list_push_front(1.0, make_array(2.0, 3.0, 4.0)), list_push_front('h', make_array('e', 'l', 'l', 'o')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select list_push_front(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), list_push_front(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), list_push_front('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array_prepend scalar function #7 (element is fixed size list) -query ??? -select array_prepend(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'), make_array(arrow_cast(make_array(2), 'FixedSizeList(1, Int64)'), arrow_cast(make_array(3), 'FixedSizeList(1, Int64)'), arrow_cast(make_array(4), 'FixedSizeList(1, Int64)'))), - array_prepend(arrow_cast(make_array(1.0), 'FixedSizeList(1, Float64)'), make_array(arrow_cast([2.0], 'FixedSizeList(1, Float64)'), arrow_cast([3.0], 'FixedSizeList(1, Float64)'), arrow_cast([4.0], 'FixedSizeList(1, Float64)'))), - array_prepend(arrow_cast(make_array('h'), 'FixedSizeList(1, Utf8)'), make_array(arrow_cast(['e'], 'FixedSizeList(1, Utf8)'), arrow_cast(['l'], 'FixedSizeList(1, Utf8)'), arrow_cast(['l'], 'FixedSizeList(1, Utf8)'), arrow_cast(['o'], 'FixedSizeList(1, Utf8)'))); ----- -[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -query ??? -select array_prepend(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'), arrow_cast(make_array(make_array(2), make_array(3), make_array(4)), 'LargeList(FixedSizeList(1, Int64))')), - array_prepend(arrow_cast(make_array(1.0), 'FixedSizeList(1, Float64)'), arrow_cast(make_array([2.0], [3.0], [4.0]), 'LargeList(FixedSizeList(1, Float64))')), - array_prepend(arrow_cast(make_array('h'), 'FixedSizeList(1, Utf8)'), arrow_cast(make_array(['e'], ['l'], ['l'], ['o']), 'LargeList(FixedSizeList(1, Utf8))')); ----- -[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -query ??? -select array_prepend(arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([[1], [2], [3]], 'FixedSizeList(3, FixedSizeList(1, Int64))')), - array_prepend(arrow_cast([1.0], 'FixedSizeList(1, Float64)'), arrow_cast([[2.0], [3.0], [4.0]], 'FixedSizeList(3, FixedSizeList(1, Float64))')), - array_prepend(arrow_cast(['h'], 'FixedSizeList(1, Utf8)'), arrow_cast([['e'], ['l'], ['l'], ['o']], 'FixedSizeList(4, FixedSizeList(1, Utf8))')); ----- -[[1], [1], [2], [3]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -# array_prepend with columns #1 -query ? -select array_prepend(column2, column1) from arrays_values; ----- -[1, NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] -[12, 11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] -[23, 21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] -[34, 31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] -[44] -[NULL, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] -[55, 51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] -[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] - -query ? -select array_prepend(column2, column1) from large_arrays_values; ----- -[1, NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] -[12, 11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] -[23, 21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] -[34, 31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] -[44] -[NULL, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] -[55, 51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] -[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] - -query ? -select array_prepend(column2, column1) from fixed_arrays_values; ----- -[1, NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] -[12, 11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] -[23, 21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] -[34, 31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] -[44, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL] -[NULL, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] -[55, 51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] -[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] - -# array_prepend with columns #2 (element is list) -query ? -select array_prepend(column2, column1) from nested_arrays; ----- -[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] -[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] - -query ? -select array_prepend(column2, column1) from large_nested_arrays; ----- -[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] -[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] - -query ? -select array_prepend(column2, column1) from fixed_size_nested_arrays; ----- -[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] -[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] - -# array_prepend with columns and scalars #1 -query ?? -select array_prepend(100.1, column2), array_prepend('.', column3) from arrays; ----- -[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] -[100.1, NULL, 5.5, 6.6] [., i, p, NULL, u, m] -[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r] -[100.1, 10.1, NULL, 12.2] [., s, i, t] -[100.1, 13.3, 14.4, 15.5] [., a, m, e, t] -[100.1] [., ,] -[100.1, 16.6, 17.7, 18.8] [.] - -query ?? -select array_prepend(100.1, column2), array_prepend('.', column3) from large_arrays; ----- -[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] -[100.1, NULL, 5.5, 6.6] [., i, p, NULL, u, m] -[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r] -[100.1, 10.1, NULL, 12.2] [., s, i, t] -[100.1, 13.3, 14.4, 15.5] [., a, m, e, t] -[100.1] [., ,] -[100.1, 16.6, 17.7, 18.8] [.] - -query ?? -select array_prepend(100.1, column2), array_prepend('.', column3) from fixed_size_arrays; ----- -[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] -[100.1, NULL, 5.5, 6.6] [., i, p, NULL, u, m] -[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r] -[100.1, 10.1, NULL, 12.2] [., s, i, t, a, b] -[100.1, 13.3, 14.4, 15.5] [., a, m, e, t, x] -[100.1, NULL, NULL, NULL] [., ,, a, b, c, d] -[100.1, 16.6, 17.7, 18.8] [., NULL, NULL, NULL, NULL, NULL] - -# array_prepend with columns and scalars #2 (element is list) -query ?? -select array_prepend(make_array(1, 11, 111), column1), array_prepend(column2, make_array(make_array(1, 2, 3), make_array(11, 12, 13))) from nested_arrays; ----- -[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] -[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] - -query ?? -select array_prepend(arrow_cast(make_array(1, 11, 111), 'LargeList(Int64)'), column1), array_prepend(column2, arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'LargeList(LargeList(Int64))')) from large_nested_arrays; ----- -[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] -[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] - -query ?? -select array_prepend(arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3, Int64)'), column1), array_prepend(column2, arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))')) from fixed_size_nested_arrays; ----- -[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] -[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] - -## array_repeat (aliases: `list_repeat`) - -# array_repeat scalar function #1 -query ???????? -select - array_repeat(1, 5), - array_repeat(3.14, 3), - array_repeat('l', 4), - array_repeat(null, 2), - list_repeat(-1, 5), - list_repeat(-3.14, 0), - list_repeat('rust', 4), - list_repeat(null, 0); ----- -[1, 1, 1, 1, 1] [3.14, 3.14, 3.14] [l, l, l, l] [NULL, NULL] [-1, -1, -1, -1, -1] [] [rust, rust, rust, rust] [] - -# array_repeat scalar function #2 (element as list) -query ???? -select - array_repeat([1], 5), - array_repeat([1.1, 2.2, 3.3], 3), - array_repeat([null, null], 3), - array_repeat([[1, 2], [3, 4]], 2); ----- -[[1], [1], [1], [1], [1]] [[1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3]] [[NULL, NULL], [NULL, NULL], [NULL, NULL]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] - -query ???? -select - array_repeat(arrow_cast([1], 'LargeList(Int64)'), 5), - array_repeat(arrow_cast([1.1, 2.2, 3.3], 'LargeList(Float64)'), 3), - array_repeat(arrow_cast([null, null], 'LargeList(Int64)'), 3), - array_repeat(arrow_cast([[1, 2], [3, 4]], 'LargeList(List(Int64))'), 2); ----- -[[1], [1], [1], [1], [1]] [[1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3]] [[NULL, NULL], [NULL, NULL], [NULL, NULL]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] - -# array_repeat scalar function with count of different integer types -query ???????? -Select - array_repeat(1, arrow_cast(2,'Int8')), - array_repeat(2, arrow_cast(2,'Int16')), - array_repeat(3, arrow_cast(2,'Int32')), - array_repeat(4, arrow_cast(2,'Int64')), - array_repeat(1, arrow_cast(2,'UInt8')), - array_repeat(2, arrow_cast(2,'UInt16')), - array_repeat(3, arrow_cast(2,'UInt32')), - array_repeat(4, arrow_cast(2,'UInt64')); ----- -[1, 1] [2, 2] [3, 3] [4, 4] [1, 1] [2, 2] [3, 3] [4, 4] - -# array_repeat scalar function with count of negative integer types -query ???? -Select - array_repeat(1, arrow_cast(-2,'Int8')), - array_repeat(2, arrow_cast(-2,'Int16')), - array_repeat(3, arrow_cast(-2,'Int32')), - array_repeat(4, arrow_cast(-2,'Int64')); ----- -[] [] [] [] - -# array_repeat with columns #1 - -statement ok -CREATE TABLE array_repeat_table -AS VALUES - (1, 1, 1.1, 'a', make_array(4, 5, 6)), - (2, null, null, null, null), - (3, 2, 2.2, 'rust', make_array(7)), - (0, 3, 3.3, 'datafusion', make_array(8, 9)); - -statement ok -CREATE TABLE large_array_repeat_table -AS SELECT - column1, - column2, - column3, - column4, - arrow_cast(column5, 'LargeList(Int64)') as column5 -FROM array_repeat_table; - -query ?????? -select - array_repeat(column2, column1), - array_repeat(column3, column1), - array_repeat(column4, column1), - array_repeat(column5, column1), - array_repeat(column2, 3), - array_repeat(make_array(1), column1) -from array_repeat_table; ----- -[1] [1.1] [a] [[4, 5, 6]] [1, 1, 1] [[1]] -[NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL, NULL] [[1], [1]] -[2, 2, 2] [2.2, 2.2, 2.2] [rust, rust, rust] [[7], [7], [7]] [2, 2, 2] [[1], [1], [1]] -[] [] [] [] [3, 3, 3] [] - -query ?????? -select - array_repeat(column2, column1), - array_repeat(column3, column1), - array_repeat(column4, column1), - array_repeat(column5, column1), - array_repeat(column2, 3), - array_repeat(make_array(1), column1) -from large_array_repeat_table; ----- -[1] [1.1] [a] [[4, 5, 6]] [1, 1, 1] [[1]] -[NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL, NULL] [[1], [1]] -[2, 2, 2] [2.2, 2.2, 2.2] [rust, rust, rust] [[7], [7], [7]] [2, 2, 2] [[1], [1], [1]] -[] [] [] [] [3, 3, 3] [] - -statement ok -drop table array_repeat_table; - -statement ok -drop table large_array_repeat_table; - -# array_repeat: arrays with NULL counts -statement ok -create table array_repeat_null_count_table -as values -(1, 2), -(2, null), -(3, 1), -(4, -1), -(null, null); - -query I? -select column1, array_repeat(column1, column2) from array_repeat_null_count_table; ----- -1 [1, 1] -2 NULL -3 [3] -4 [] -NULL NULL - -statement ok -drop table array_repeat_null_count_table - -# array_repeat: nested arrays with NULL counts -statement ok -create table array_repeat_nested_null_count_table -as values -([[1, 2], [3, 4]], 2), -([[5, 6], [7, 8]], null), -([[null, null], [9, 10]], 1), -(null, 3), -([[11, 12]], -1); - -query ?? -select column1, array_repeat(column1, column2) from array_repeat_nested_null_count_table; ----- -[[1, 2], [3, 4]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] -[[5, 6], [7, 8]] NULL -[[NULL, NULL], [9, 10]] [[[NULL, NULL], [9, 10]]] -NULL [NULL, NULL, NULL] -[[11, 12]] [] - -statement ok -drop table array_repeat_nested_null_count_table - -# array_repeat edge cases: empty arrays -query ??? -select array_repeat([], 3), array_repeat([], 0), array_repeat([], null); ----- -[[], [], []] [] NULL - -query ?? -select array_repeat(null::int, 0), array_repeat(null::int, null); ----- -[] NULL - -# array_repeat LargeList with NULL count -statement ok -create table array_repeat_large_list_null_table -as values -(arrow_cast([1, 2, 3], 'LargeList(Int64)'), 2), -(arrow_cast([4, 5], 'LargeList(Int64)'), null), -(arrow_cast(null, 'LargeList(Int64)'), 3); - -query ?? -select column1, array_repeat(column1, column2) from array_repeat_large_list_null_table; ----- -[1, 2, 3] [[1, 2, 3], [1, 2, 3]] -[4, 5] NULL -NULL [NULL, NULL, NULL] - -statement ok -drop table array_repeat_large_list_null_table - -# array_repeat edge cases: LargeList nested with NULL count -statement ok -create table array_repeat_large_nested_null_table -as values -(arrow_cast([[1, 2], [3, 4]], 'LargeList(List(Int64))'), 2), -(arrow_cast([[5, 6], [7, 8]], 'LargeList(List(Int64))'), null), -(arrow_cast([[null, null]], 'LargeList(List(Int64))'), 1), -(null, 3); - -query ?? -select column1, array_repeat(column1, column2) from array_repeat_large_nested_null_table; ----- -[[1, 2], [3, 4]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] -[[5, 6], [7, 8]] NULL -[[NULL, NULL]] [[[NULL, NULL]]] -NULL [NULL, NULL, NULL] - -statement ok -drop table array_repeat_large_nested_null_table - -## array_concat (aliases: `array_cat`, `list_concat`, `list_cat`) - -# test with empty array -query ? -select array_concat([]); ----- -[] - -# test with NULL array -query ? -select array_concat(NULL::integer[]); ----- -NULL - -# test with multiple NULL arrays -query ? -select array_concat(NULL::integer[], NULL::integer[]); ----- -NULL - -# test with NULL LargeList -query ? -select array_concat(arrow_cast(NULL::string[], 'LargeList(Utf8)')); ----- -NULL - -# test with NULL FixedSizeList -query ? -select array_concat(arrow_cast(NULL::string[], 'FixedSizeList(2, Utf8)')); ----- -NULL - -# test with mix of NULL and empty arrays -query ? -select array_concat(NULL::integer[], []); ----- -[] - -# test with mix of NULL and non-empty arrays -query ? -select array_concat(NULL::integer[], [1, 2, 3]); ----- -[1, 2, 3] - -# Concatenating strings arrays -query ? -select array_concat( - ['1', '2'], - ['3'] -); ----- -[1, 2, 3] - -query ? -select array_concat( - arrow_cast(['1', '2'], 'LargeList(Utf8)'), - arrow_cast(['3'], 'LargeList(Utf8)') -); ----- -[1, 2, 3] - -query ? -select array_concat( - arrow_cast(['1', '2'], 'FixedSizeList(2, Utf8)'), - arrow_cast(['3'], 'FixedSizeList(1, Utf8)') -); ----- -[1, 2, 3] - -# Concatenating string arrays -query ? -select array_concat( - [arrow_cast('1', 'LargeUtf8'), arrow_cast('2', 'LargeUtf8')], - [arrow_cast('3', 'LargeUtf8')] -); ----- -[1, 2, 3] - -# Concatenating stringview -query ? -select array_concat( - [arrow_cast('1', 'Utf8View'), arrow_cast('2', 'Utf8View')], - [arrow_cast('3', 'Utf8View')] -); ----- -[1, 2, 3] - -# Concatenating Mixed types -query ? -select array_concat( - [arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], - [arrow_cast('3', 'LargeUtf8')] -); ----- -[1, 2, 3] - -# Concatenating Mixed types -query ?T -select - array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')]), - arrow_typeof(array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')])); ----- -[1, 2, 3] List(Utf8View) - -# array_concat with NULL elements inside arrays -query ? -select array_concat([1, NULL, 3], [NULL, 5]); ----- -[1, NULL, 3, NULL, 5] - -query ? -select array_concat([NULL, NULL], [1, 2], [NULL]); ----- -[NULL, NULL, 1, 2, NULL] - -query ? -select array_concat([NULL, NULL], [NULL, NULL]); ----- -[NULL, NULL, NULL, NULL] - -# array_concat error -query error DataFusion error: Error during planning: Execution error: Function 'array_concat' user-defined coercion failed with: Error during planning: array_concat does not support type Int64 -select array_concat(1, 2); - -# array_concat scalar function #1 -query ?? -select array_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_concat(make_array([1], [2]), make_array([3], [4])); ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] - -# array_concat scalar function #2 -query ? -select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array(5, 6), make_array(7, 8))); ----- -[[1, 2], [3, 4], [5, 6], [7, 8]] - -# array_concat scalar function #3 -query ? -select array_concat(make_array([1], [2], [3]), make_array([4], [5], [6]), make_array([7], [8], [9])); ----- -[[1], [2], [3], [4], [5], [6], [7], [8], [9]] - -# array_concat scalar function #4 -query ? -select array_concat(make_array([[1]]), make_array([[2]])); ----- -[[[1]], [[2]]] - -# array_concat scalar function #5 -query ? -select array_concat(make_array(2, 3), make_array()); ----- -[2, 3] - -# array_concat scalar function #6 -query ? -select array_concat(make_array(), make_array(2, 3)); ----- -[2, 3] - -# array_concat scalar function #7 (with empty arrays) -query ? -select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array())); ----- -[[1, 2], [3, 4], []] - -# array_concat scalar function #8 (with empty arrays) -query ? -select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array()), make_array(make_array(), make_array()), make_array(make_array(5, 6), make_array(7, 8))); ----- -[[1, 2], [3, 4], [], [], [], [5, 6], [7, 8]] - -# array_concat scalar function #9 (with empty arrays) -query ? -select array_concat(make_array(make_array()), make_array(make_array(1, 2), make_array(3, 4))); ----- -[[], [1, 2], [3, 4]] - -# array_cat scalar function #10 (function alias `array_concat`) -query ?? -select array_cat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_cat(make_array([1], [2]), make_array([3], [4])); ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] - -# list_concat scalar function #11 (function alias `array_concat`) -query ?? -select list_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), list_concat(make_array([1], [2]), make_array([3], [4])); ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] - -# list_cat scalar function #12 (function alias `array_concat`) -query ?? -select list_cat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), list_cat(make_array([1], [2]), make_array([3], [4])); ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] - -# array_concat with different dimensions #1 (2D + 1D) -query ? -select array_concat(make_array([1,2], [3,4]), make_array(5, 6)); ----- -[[1, 2], [3, 4], [5, 6]] - -# array_concat with different dimensions #2 (1D + 2D) -query ? -select array_concat(make_array(5, 6), make_array([1,2], [3,4])); ----- -[[5, 6], [1, 2], [3, 4]] - -# array_concat with different dimensions #3 (2D + 1D + 1D) -query ? -select array_concat(make_array([1,2], [3,4]), make_array(5, 6), make_array(7,8)); ----- -[[1, 2], [3, 4], [5, 6], [7, 8]] - -# array_concat with different dimensions #4 (1D + 2D + 3D) -query ? -select array_concat(make_array(10, 20), make_array([30, 40]), make_array([[50, 60]])); ----- -[[[10, 20]], [[30, 40]], [[50, 60]]] - -# array_concat with different dimensions #5 (2D + 1D + 3D) -query ? -select array_concat(make_array([30, 40]), make_array(10, 20), make_array([[50, 60]])); ----- -[[[30, 40]], [[10, 20]], [[50, 60]]] - -# array_concat with different dimensions #6 (2D + 1D + 3D + 4D + 3D) -query ? -select array_concat(make_array([30, 40]), make_array(10, 20), make_array([[50, 60]]), make_array([[[70, 80]]]), make_array([[80, 40]])); ----- -[[[[30, 40]]], [[[10, 20]]], [[[50, 60]]], [[[70, 80]]], [[[80, 40]]]] - -# array_concat column-wise #1 -query ? -select array_concat(column1, make_array(0)) from arrays_values_without_nulls; ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0] -[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 0] -[21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0] -[31, 32, 33, 34, 35, 26, 37, 38, 39, 40, 0] - -# array_concat column-wise #2 -query ? -select array_concat(column1, column1) from arrays_values_without_nulls; ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] -[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] -[21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] -[31, 32, 33, 34, 35, 26, 37, 38, 39, 40, 31, 32, 33, 34, 35, 26, 37, 38, 39, 40] - -# array_concat column-wise #3 -query ? -select array_concat(make_array(column2), make_array(column3)) from arrays_values_without_nulls; ----- -[1, 1] -[12, 2] -[23, 3] -[34, 4] - -# array_concat column-wise #4 -query ? -select array_concat(make_array(column2), make_array(0)) from arrays_values; ----- -[1, 0] -[12, 0] -[23, 0] -[34, 0] -[44, 0] -[NULL, 0] -[55, 0] -[66, 0] - -# array_concat column-wise #5 -query ??? -select array_concat(column1, column1), array_concat(column2, column2), array_concat(column3, column3) from arrays; ----- -[[NULL, 2], [3, NULL], [NULL, 2], [3, NULL]] [1.1, 2.2, 3.3, 1.1, 2.2, 3.3] [L, o, r, e, m, L, o, r, e, m] -[[3, 4], [5, 6], [3, 4], [5, 6]] [NULL, 5.5, 6.6, NULL, 5.5, 6.6] [i, p, NULL, u, m, i, p, NULL, u, m] -[[5, 6], [7, 8], [5, 6], [7, 8]] [7.7, 8.8, 9.9, 7.7, 8.8, 9.9] [d, NULL, l, o, r, d, NULL, l, o, r] -[[7, NULL], [9, 10], [7, NULL], [9, 10]] [10.1, NULL, 12.2, 10.1, NULL, 12.2] [s, i, t, s, i, t] -NULL [13.3, 14.4, 15.5, 13.3, 14.4, 15.5] [a, m, e, t, a, m, e, t] -[[11, 12], [13, 14], [11, 12], [13, 14]] NULL [,, ,] -[[15, 16], [NULL, 18], [15, 16], [NULL, 18]] [16.6, 17.7, 18.8, 16.6, 17.7, 18.8] NULL - -# array_concat column-wise #6 -query ?? -select array_concat(column1, make_array(make_array(1, 2), make_array(3, 4))), array_concat(column2, make_array(1.1, 2.2, 3.3)) from arrays; ----- -[[NULL, 2], [3, NULL], [1, 2], [3, 4]] [1.1, 2.2, 3.3, 1.1, 2.2, 3.3] -[[3, 4], [5, 6], [1, 2], [3, 4]] [NULL, 5.5, 6.6, 1.1, 2.2, 3.3] -[[5, 6], [7, 8], [1, 2], [3, 4]] [7.7, 8.8, 9.9, 1.1, 2.2, 3.3] -[[7, NULL], [9, 10], [1, 2], [3, 4]] [10.1, NULL, 12.2, 1.1, 2.2, 3.3] -[[1, 2], [3, 4]] [13.3, 14.4, 15.5, 1.1, 2.2, 3.3] -[[11, 12], [13, 14], [1, 2], [3, 4]] [1.1, 2.2, 3.3] -[[15, 16], [NULL, 18], [1, 2], [3, 4]] [16.6, 17.7, 18.8, 1.1, 2.2, 3.3] - -# array_concat column-wise #7 -query ? -select array_concat(column3, make_array('.', '.', '.')) from arrays; ----- -[L, o, r, e, m, ., ., .] -[i, p, NULL, u, m, ., ., .] -[d, NULL, l, o, r, ., ., .] -[s, i, t, ., ., .] -[a, m, e, t, ., ., .] -[,, ., ., .] -[., ., .] - -# query ??I? -# select column1, column2, column3, column4 from arrays_values_v2; -# ---- -# [NULL, 2, 3] [4, 5, NULL] 12 [[30, 40, 50]] -# NULL [7, NULL, 8] 13 [[NULL, NULL, 60]] -# [9, NULL, 10] NULL 14 [[70, NULL, NULL]] -# [NULL, 1] [NULL, 21] NULL NULL -# [11, 12] NULL NULL NULL -# NULL NULL NULL NULL - - -# array_concat column-wise #8 (1D + 1D) -query ? -select array_concat(column1, column2) from arrays_values_v2; ----- -[NULL, 2, 3, 4, 5, NULL] -[7, NULL, 8] -[9, NULL, 10] -[NULL, 1, NULL, 21] -[11, 12] -NULL - -# array_concat column-wise #9 (2D + 1D) -query ? -select array_concat(column4, make_array(column3)) from arrays_values_v2; ----- -[[30, 40, 50], [12]] -[[NULL, NULL, 60], [13]] -[[70, NULL, NULL], [14]] -[[NULL]] -[[NULL]] -[[NULL]] - -# array_concat column-wise #10 (3D + 2D + 1D) -query ? -select array_concat(column4, column1, column2) from nested_arrays; ----- -[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]], [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]], [[7, 8, 9]]] -[[[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]], [[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]], [[10, 11, 12]]] - -# array_concat column-wise #11 (2D + 1D) -query ? -select array_concat(column4, column1) from arrays_values_v2; ----- -[[30, 40, 50], [NULL, 2, 3]] -[[NULL, NULL, 60], NULL] -[[70, NULL, NULL], [9, NULL, 10]] -[[NULL, 1]] -[[11, 12]] -[NULL] - -# array_concat column-wise #12 (1D + 1D + 1D) -query ? -select array_concat(make_array(column3), column1, column2) from arrays_values_v2; ----- -[12, NULL, 2, 3, 4, 5, NULL] -[13, 7, NULL, 8] -[14, 9, NULL, 10] -[NULL, NULL, 1, NULL, 21] -[NULL, 11, 12] -[NULL] - -## array_position (aliases: `list_position`, `array_indexof`, `list_indexof`) - -## array_position with NULL (follow PostgreSQL) -query II -select array_position([1, 2, 3, 4, 5], arrow_cast(NULL, 'Int64')), array_position(arrow_cast(NULL, 'List(Int64)'), 1); ----- -NULL NULL - -# array_position with no match (incl. empty array) returns NULL -query II -select array_position([], 1), array_position([2], 1); ----- -NULL NULL - -# array_position scalar function #1 -query III -select array_position(['h', 'e', 'l', 'l', 'o'], 'l'), array_position([1, 2, 3, 4, 5], 5), array_position([1, 1, 1], 1); ----- -3 5 1 - -query III -select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), array_position(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), array_position(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); ----- -3 5 1 - -query III -select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l'), array_position(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5), array_position(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1); ----- -3 5 1 - -# array_position scalar function #2 (with optional argument) -query III -select array_position(['h', 'e', 'l', 'l', 'o'], 'l', 4), array_position([1, 2, 5, 4, 5], 5, 4), array_position([1, 1, 1], 1, 2); ----- -4 5 2 - -query III -select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l', 4), array_position(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5, 4), array_position(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1, 2); ----- -4 5 2 - -query III -select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l', 4), array_position(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5, 4), array_position(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1, 2); ----- -4 5 2 - -# array_position scalar function #3 (element is list) -query II -select array_position(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_position(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]); ----- -2 2 - -# array_position scalar function #4 (element in list; with optional argument) -query II -select array_position(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6], 3), array_position(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4], 3); ----- -4 3 - -query II -select array_position(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6]), array_position(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4]); ----- -2 2 - -query I -SELECT array_position(arrow_cast([5, 2, 3, 4, 5], 'List(Int32)'), 5) ----- -1 - -query I -SELECT array_position(arrow_cast([5, 2, 3, 4, 5], 'List(Int32)'), 5, 2) ----- -5 - -query I -SELECT array_position(arrow_cast([1, 1, 100, 1, 1], 'LargeList(Int32)'), 100) ----- -3 - -query error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'array_position' function: coercion from -SELECT array_position([1, 2, 3], 'foo') - -query error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'array_position' function: coercion from -SELECT array_position([1, 2, 3], 'foo', 2) - -# list_position scalar function #5 (function alias `array_position`) -query III -select list_position(['h', 'e', 'l', 'l', 'o'], 'l'), list_position([1, 2, 3, 4, 5], 5), list_position([1, 1, 1], 1); ----- -3 5 1 - -query III -select list_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), list_position(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), list_position(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); ----- -3 5 1 - -# array_indexof scalar function #6 (function alias `array_position`) -query III -select array_indexof(['h', 'e', 'l', 'l', 'o'], 'l'), array_indexof([1, 2, 3, 4, 5], 5), array_indexof([1, 1, 1], 1); ----- -3 5 1 - -query III -select array_indexof(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), array_indexof(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), array_indexof(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); ----- -3 5 1 - -# list_indexof scalar function #7 (function alias `array_position`) -query III -select list_indexof(['h', 'e', 'l', 'l', 'o'], 'l'), list_indexof([1, 2, 3, 4, 5], 5), list_indexof([1, 1, 1], 1); ----- -3 5 1 - -query III -select list_indexof(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), list_indexof(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), list_indexof(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); ----- -3 5 1 - -# array_position with columns #1 -query II -select array_position(column1, column2), array_position(column1, column2, column3) from arrays_values_without_nulls; ----- -1 1 -2 2 -3 3 -4 4 - -query II -select array_position(column1, column2), array_position(column1, column2, column3) from large_arrays_values_without_nulls; ----- -1 1 -2 2 -3 3 -4 4 - -# array_position with columns #2 (element is list) -query II -select array_position(column1, column2), array_position(column1, column2, column3) from nested_arrays; ----- -3 3 -2 5 - -query II -select array_position(column1, column2), array_position(column1, column2, column3) from nested_arrays; ----- -3 3 -2 5 - -# array_position with columns and scalars #1 -query III -select array_position(make_array(1, 2, 3, 4, 5), column2), array_position(column1, 3), array_position(column1, 3, 5) from arrays_values_without_nulls; ----- -1 3 NULL -NULL NULL NULL -NULL NULL NULL -NULL NULL NULL - -query III -select array_position(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), column2), array_position(column1, 3), array_position(column1, 3, 5) from large_arrays_values_without_nulls; ----- -1 3 NULL -NULL NULL NULL -NULL NULL NULL -NULL NULL NULL - -# array_position with columns and scalars #2 (element is list) -query III -select array_position(make_array([1, 2, 3], [4, 5, 6], [11, 12, 13]), column2), array_position(column1, make_array(4, 5, 6)), array_position(column1, make_array(1, 2, 3), 2) from nested_arrays; ----- -NULL 6 4 -NULL 1 NULL - -query III -select array_position(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [11, 12, 13]), 'LargeList(LargeList(Int64))'), column2), array_position(column1, arrow_cast(make_array(4, 5, 6), 'LargeList(Int64)')), array_position(column1, arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 2) from large_nested_arrays; ----- -NULL 6 4 -NULL 1 NULL - -# array_position with NULL element in haystack array (NULL = NULL semantics) -query III -select array_position([1, NULL, 3], arrow_cast(NULL, 'Int64')), array_position([NULL, 2, 3], arrow_cast(NULL, 'Int64')), array_position([1, 2, NULL], arrow_cast(NULL, 'Int64')); ----- -2 1 3 - -query I -select array_position(arrow_cast([1, NULL, 3], 'LargeList(Int64)'), arrow_cast(NULL, 'Int64')); ----- -2 - -# array_position with NULL element in array and start_from -query II -select array_position([NULL, 1, NULL, 2], arrow_cast(NULL, 'Int64'), 2), array_position([NULL, 1, NULL, 2], arrow_cast(NULL, 'Int64'), 1); ----- -3 1 - -# array_position with column array and scalar element -query IIII -select array_position(column1, 3), array_position(column1, 10), array_position(column1, 20), array_position(column1, 999) from arrays_values_without_nulls; ----- -3 10 NULL NULL -NULL NULL 10 NULL -NULL NULL NULL NULL -NULL NULL NULL NULL - -query II -select array_position(column1, 3), array_position(column1, 20) from large_arrays_values_without_nulls; ----- -3 NULL -NULL 10 -NULL NULL -NULL NULL - -query II -select array_position(column1, 3), array_position(column1, 20) from fixed_size_arrays_values_without_nulls; ----- -3 NULL -NULL 10 -NULL NULL -NULL NULL - -# array_position with column array, scalar element, and scalar start_from -query II -select array_position(column1, 3, 1), array_position(column1, 3, 4) from arrays_values_without_nulls; ----- -3 NULL -NULL NULL -NULL NULL -NULL NULL - -query II -select array_position(column1, 3, 1), array_position(column1, 3, 4) from large_arrays_values_without_nulls; ----- -3 NULL -NULL NULL -NULL NULL -NULL NULL - -# array_position with column array, scalar element, and column start_from -query I -select array_position(column1, 3, column3) from arrays_values_without_nulls; ----- -3 -NULL -NULL -NULL - -# array_position with scalar haystack, scalar element, and column start_from -query I -select array_position([1, 2, 1, 2], 2, column3) from arrays_values_without_nulls; ----- -2 -2 -4 -4 - -# array_position start_from boundary cases -query IIII -select array_position([1, 2, 3], 3, 3), array_position([1, 2, 3], 1, 2), array_position([1, 2, 3], 1, 1), array_position([1, 2, 3], 3, 4); ----- -3 NULL 1 NULL - -query II -select array_position([1, 2, 3], 3, 4), array_position([1], 1, 2); ----- -NULL NULL - -# array_position with empty array in various contexts -query II -select array_position(arrow_cast(make_array(), 'List(Int64)'), 1), array_position(arrow_cast(make_array(), 'LargeList(Int64)'), 1); ----- -NULL NULL - -# FixedSizeList with start_from -query II -select array_position(arrow_cast([1, 2, 3, 1, 2], 'FixedSizeList(5, Int64)'), 1, 2), array_position(arrow_cast([1, 2, 3, 1, 2], 'FixedSizeList(5, Int64)'), 2, 4); ----- -4 5 - -query I -select array_position(arrow_cast(['a', 'b', 'c', 'b'], 'FixedSizeList(4, Utf8)'), 'b', 3); ----- -4 - -## array_positions (aliases: `list_positions`) - -# array_positions with empty array -query ? -select array_positions(arrow_cast(make_array(), 'List(Int64)'), 1); ----- -[] - -query ? -select array_positions([1, 2, 3, 4, 5], null); ----- -[] - -#TODO: https://github.com/apache/datafusion/issues/7142 -# array_positions with NULL (follow PostgreSQL) -#query ? -#select array_positions(null, 1); -#---- -#NULL - -# array_positions scalar function #1 -query ??? -select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3, 4, 5], 5), array_positions([1, 1, 1], 1); ----- -[3, 4] [5] [1, 2, 3] - -query ??? -select array_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), array_positions(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), array_positions(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); ----- -[3, 4] [5] [1, 2, 3] - -query ??? -select array_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l'), array_positions(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5), array_positions(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1); ----- -[3, 4] [5] [1, 2, 3] - -# array_positions scalar function #2 (element is list) -query ? -select array_positions(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2, 1, 3], [4, 5, 6]), [2, 1, 3]); ----- -[2, 4] - -query ? -select array_positions(arrow_cast(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2, 1, 3], [4, 5, 6]), 'LargeList(List(Int64))'), [2, 1, 3]); ----- -[2, 4] - -query ? -select array_positions(arrow_cast(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2, 1, 3], [4, 5, 6]), 'FixedSizeList(5, List(Int64))'), [2, 1, 3]); ----- -[2, 4] - -# list_positions scalar function #3 (function alias `array_positions`) -query ??? -select list_positions(['h', 'e', 'l', 'l', 'o'], 'l'), list_positions([1, 2, 3, 4, 5], 5), list_positions([1, 1, 1], 1); ----- -[3, 4] [5] [1, 2, 3] - -query ??? -select list_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), list_positions(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), list_positions(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); ----- -[3, 4] [5] [1, 2, 3] - -query ??? -select list_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l'), - list_positions(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5), - list_positions(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1); ----- -[3, 4] [5] [1, 2, 3] - -# array_positions with columns #1 -query ? -select array_positions(column1, column2) from arrays_values_without_nulls; ----- -[1] -[2] -[3] -[4] - -query ? -select array_positions(arrow_cast(column1, 'LargeList(Int64)'), column2) from arrays_values_without_nulls; ----- -[1] -[2] -[3] -[4] - -query ? -select array_positions(arrow_cast(column1, 'LargeList(Int64)'), column2) from fixed_size_arrays_values_without_nulls; ----- -[1] -[2] -[3] -[4] - -# array_positions with columns #2 (element is list) -query ? -select array_positions(column1, column2) from nested_arrays; ----- -[3] -[2, 5] - -query ? -select array_positions(arrow_cast(column1, 'LargeList(List(Int64))'), column2) from nested_arrays; ----- -[3] -[2, 5] - -query ? -select array_positions(column1, column2) from fixed_size_nested_arrays; ----- -[3] -[2, 5] - -# array_positions with columns and scalars #1 -query ?? -select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values_without_nulls; ----- -[4] [1] -[] [] -[] [3] -[] [] - -query ?? -select array_positions(arrow_cast(column1, 'LargeList(Int64)'), 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values_without_nulls; ----- -[4] [1] -[] [] -[] [3] -[] [] - -query ?? -select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from fixed_size_arrays_values_without_nulls; ----- -[4] [1] -[] [] -[] [3] -[] [] - -# array_positions with columns and scalars #2 (element is list) -query ?? -select array_positions(column1, make_array(4, 5, 6)), array_positions(make_array([1, 2, 3], [11, 12, 13], [4, 5, 6]), column2) from nested_arrays; ----- -[6] [] -[1] [] - -query ?? -select array_positions(arrow_cast(column1, 'LargeList(List(Int64))'), make_array(4, 5, 6)), array_positions(arrow_cast(make_array([1, 2, 3], [11, 12, 13], [4, 5, 6]), 'LargeList(List(Int64))'), column2) from nested_arrays; ----- -[6] [] -[1] [] - -query ?? -select array_positions(column1, make_array(4, 5, 6)), array_positions(make_array([1, 2, 3], [11, 12, 13], [4, 5, 6]), column2) from fixed_size_nested_arrays; ----- -[6] [] -[1] [] - -## array_replace (aliases: `list_replace`) - -# array_replace scalar function #1 -query ??? -select - array_replace(make_array(1, 2, 3, 4), 2, 3), - array_replace(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), - array_replace(make_array(1, 2, 3), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] - -query ??? -select - array_replace(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), - array_replace(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), - array_replace(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] - -query ??? -select - array_replace(arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), 2, 3), - array_replace(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'FixedSizeList(7, Int64)'), 4, 0), - array_replace(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] - -# array_replace scalar function #2 (element is list) -query ?? -select - array_replace( - make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), - [4, 5, 6], - [1, 1, 1] - ), - array_replace( - make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), - [2, 3, 4], - [3, 1, 4] - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select - array_replace( - arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), - [4, 5, 6], - [1, 1, 1] - ), - array_replace( - arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), - [2, 3, 4], - [3, 1, 4] - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select - array_replace( - arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), - [4, 5, 6], - [1, 1, 1] - ), - array_replace( - arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), - [2, 3, 4], - [3, 1, 4] - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -# list_replace scalar function #3 (function alias `list_replace`) -query ??? -select list_replace( - make_array(1, 2, 3, 4), 2, 3), - list_replace(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), - list_replace(make_array(1, 2, 3), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] - -query ??? -select list_replace( - arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), - list_replace(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), - list_replace(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] - -# array_replace scalar function #4 (null input) -query ? -select array_replace(make_array(1, 2, 3, 4, 5), NULL, NULL); ----- -[1, 2, 3, 4, 5] - -query ? -select array_replace(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, NULL); ----- -[1, 2, 3, 4, 5] - -# array_replace scalar function with columns #1 -query ? -select array_replace(column1, column2, column3) from arrays_with_repeating_elements; ----- -[1, 4, 1, 3, 2, 2, 1, 3, 2, 3] -[7, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[10, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[13, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -query ? -select array_replace(column1, column2, column3) from large_arrays_with_repeating_elements; ----- -[1, 4, 1, 3, 2, 2, 1, 3, 2, 3] -[7, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[10, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[13, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -# array_replace scalar function with columns #2 (element is list) -query ? -select array_replace(column1, column2, column3) from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[19, 20, 21], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[28, 29, 30], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ? -select array_replace(column1, column2, column3) from large_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[19, 20, 21], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[28, 29, 30], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -# array_replace scalar function with columns and scalars #1 -query ??? -select - array_replace(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column3), - array_replace(column1, 1, column3), - array_replace(column1, column2, 4) -from arrays_with_repeating_elements; ----- -[1, 4, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 1, 3, 2, 2, 1, 3, 2, 3] [1, 4, 1, 3, 2, 2, 1, 3, 2, 3] -[1, 2, 2, 7, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -query ??? -select - array_replace(arrow_cast(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), 'LargeList(Int64)'), column2, column3), - array_replace(column1, 1, column3), - array_replace(column1, column2, 4) -from large_arrays_with_repeating_elements; ----- -[1, 4, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 1, 3, 2, 2, 1, 3, 2, 3] [1, 4, 1, 3, 2, 2, 1, 3, 2, 3] -[1, 2, 2, 7, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -# array_replace scalar function with columns and scalars #2 (element is list) -query ??? -select - array_replace( - make_array( - [1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), - column2, - column3 - ), - array_replace(column1, make_array(1, 2, 3), column3), - array_replace(column1, column2, make_array(11, 12, 13)) -from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ??? -select - array_replace( - arrow_cast(make_array( - [1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]),'LargeList(List(Int64))'), - column2, - column3 - ), - array_replace(column1, make_array(1, 2, 3), column3), - array_replace(column1, column2, make_array(11, 12, 13)) -from large_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -## array_replace_n (aliases: `list_replace_n`) - -# array_replace_n scalar function #1 -query ??? -select - array_replace_n(make_array(1, 2, 3, 4), 2, 3, 2), - array_replace_n(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0, 2), - array_replace_n(make_array(1, 2, 3), 4, 0, 3); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] - -query ??? -select - array_replace_n(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3, 2), - array_replace_n(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0, 2), - array_replace_n(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0, 3); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] - -query ??? -select - array_replace_n(arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), 2, 3, 2), - array_replace_n(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'FixedSizeList(7, Int64)'), 4, 0, 2), - array_replace_n(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4, 0, 3); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] - -# array_replace_n scalar function #2 (element is list) -query ?? -select - array_replace_n( - make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), - [4, 5, 6], - [1, 1, 1], - 2 - ), - array_replace_n( - make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), - [2, 3, 4], - [3, 1, 4], - 2 - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select - array_replace_n( - arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), - [4, 5, 6], - [1, 1, 1], - 2 - ), - array_replace_n( - arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), - [2, 3, 4], - [3, 1, 4], - 2 - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select - array_replace_n( - arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), - [4, 5, 6], - [1, 1, 1], - 2 - ), - array_replace_n( - arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), - [2, 3, 4], - [3, 1, 4], - 2 - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] - -# list_replace_n scalar function #3 (function alias `array_replace_n`) -query ??? -select - list_replace_n(make_array(1, 2, 3, 4), 2, 3, 2), - list_replace_n(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0, 2), - list_replace_n(make_array(1, 2, 3), 4, 0, 3); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] - -query ??? -select - list_replace_n(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3, 2), - list_replace_n(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0, 2), - list_replace_n(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0, 3); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] - -# array_replace_n scalar function #4 (null input) -query ? -select array_replace_n(make_array(1, 2, 3, 4, 5), NULL, NULL, NULL); ----- -[1, 2, 3, 4, 5] - -query ? -select array_replace_n(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, NULL, NULL); ----- -[1, 2, 3, 4, 5] - -# array_replace_n scalar function with columns #1 -query ? -select - array_replace_n(column1, column2, column3, column4) -from arrays_with_repeating_elements; ----- -[1, 4, 1, 3, 4, 4, 1, 3, 2, 3] -[7, 7, 5, 5, 6, 5, 5, 5, 4, 4] -[10, 10, 10, 8, 10, 9, 10, 8, 7, 7] -[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] - -query ? -select - array_replace_n(column1, column2, column3, column4) -from large_arrays_with_repeating_elements; ----- -[1, 4, 1, 3, 4, 4, 1, 3, 2, 3] -[7, 7, 5, 5, 6, 5, 5, 5, 4, 4] -[10, 10, 10, 8, 10, 9, 10, 8, 7, 7] -[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] - -# array_replace_n scalar function with columns #2 (element is list) -query ? -select - array_replace_n(column1, column2, column3, column4) -from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] - -query ? -select - array_replace_n(column1, column2, column3, column4) -from large_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] - - -# array_replace_n scalar function with columns and scalars #1 -query ???? -select - array_replace_n(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column3, column4), - array_replace_n(column1, 1, column3, column4), - array_replace_n(column1, column2, 4, column4), - array_replace_n(column1, column2, column3, 2) -from arrays_with_repeating_elements; ----- -[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 2, 3] [1, 4, 1, 3, 4, 2, 1, 3, 2, 3] -[1, 2, 2, 7, 5, 7, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [7, 7, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 7, 7] [10, 10, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] [13, 11, 12, 13, 11, 12, 10, 11, 12, 10] - -query ???? -select - array_replace_n(arrow_cast(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), 'LargeList(Int64)'), column2, column3, column4), - array_replace_n(column1, 1, column3, column4), - array_replace_n(column1, column2, 4, column4), - array_replace_n(column1, column2, column3, 2) -from large_arrays_with_repeating_elements; ----- -[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 2, 3] [1, 4, 1, 3, 4, 2, 1, 3, 2, 3] -[1, 2, 2, 7, 5, 7, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [7, 7, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 7, 7] [10, 10, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] [13, 11, 12, 13, 11, 12, 10, 11, 12, 10] - -# array_replace_n scalar function with columns and scalars #2 (element is list) -query ???? -select - array_replace_n( - make_array( - [7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]), - column2, - column3, - column4 - ), - array_replace_n(column1, make_array(1, 2, 3), column3, column4), - array_replace_n(column1, column2, make_array(11, 12, 13), column4), - array_replace_n(column1, column2, column3, 2) -from nested_arrays_with_repeating_elements; ----- -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [10, 11, 12]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [19, 20, 21], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[28, 29, 30], [28, 29, 30], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] [[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ???? -select - array_replace_n( - arrow_cast(make_array( - [7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]), 'LargeList(List(Int64))'), - column2, - column3, - column4 - ), - array_replace_n(column1, make_array(1, 2, 3), column3, column4), - array_replace_n(column1, column2, make_array(11, 12, 13), column4), - array_replace_n(column1, column2, column3, 2) -from large_nested_arrays_with_repeating_elements; ----- -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [10, 11, 12]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [19, 20, 21], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[28, 29, 30], [28, 29, 30], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] [[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -## array_replace_all (aliases: `list_replace_all`) - -# array_replace_all scalar function #1 -query ??? -select - array_replace_all(make_array(1, 2, 3, 4), 2, 3), - array_replace_all(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), - array_replace_all(make_array(1, 2, 3), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] - -query ??? -select - array_replace_all(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), - array_replace_all(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), - array_replace_all(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] - -query ??? -select - array_replace_all(arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), 2, 3), - array_replace_all(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'FixedSizeList(7, Int64)'), 4, 0), - array_replace_all(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] - -# array_replace_all scalar function #2 (element is list) -query ?? -select - array_replace_all( - make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), - [4, 5, 6], - [1, 1, 1] - ), - array_replace_all( - make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), - [2, 3, 4], - [3, 1, 4] - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select - array_replace_all( - arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), - [4, 5, 6], - [1, 1, 1] - ), - array_replace_all( - arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), - [2, 3, 4], - [3, 1, 4] - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select - array_replace_all( - arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), - [4, 5, 6], - [1, 1, 1] - ), - array_replace_all( - arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), - [2, 3, 4], - [3, 1, 4] - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] - -# list_replace_all scalar function #3 (function alias `array_replace_all`) -query ??? -select - list_replace_all(make_array(1, 2, 3, 4), 2, 3), - list_replace_all(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), - list_replace_all(make_array(1, 2, 3), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] - -query ??? -select - list_replace_all(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), - list_replace_all(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), - list_replace_all(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] - -# array_replace_all scalar function #4 (null input) -query ? -select array_replace_all(make_array(1, 2, 3, 4, 5), NULL, NULL); ----- -[1, 2, 3, 4, 5] - -query ? -select array_replace_all(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, NULL); ----- -[1, 2, 3, 4, 5] - -# array_replace_all scalar function with columns #1 -query ? -select - array_replace_all(column1, column2, column3) -from arrays_with_repeating_elements; ----- -[1, 4, 1, 3, 4, 4, 1, 3, 4, 3] -[7, 7, 5, 5, 6, 5, 5, 5, 7, 7] -[10, 10, 10, 8, 10, 9, 10, 8, 10, 10] -[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] - -query ? -select - array_replace_all(column1, column2, column3) -from large_arrays_with_repeating_elements; ----- -[1, 4, 1, 3, 4, 4, 1, 3, 4, 3] -[7, 7, 5, 5, 6, 5, 5, 5, 7, 7] -[10, 10, 10, 8, 10, 9, 10, 8, 10, 10] -[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] - -# array_replace_all scalar function with columns #2 (element is list) -query ? -select - array_replace_all(column1, column2, column3) -from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [7, 8, 9]] -[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [19, 20, 21], [19, 20, 21]] -[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [28, 29, 30], [28, 29, 30]] -[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] - -query ? -select - array_replace_all(column1, column2, column3) -from large_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [7, 8, 9]] -[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [19, 20, 21], [19, 20, 21]] -[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [28, 29, 30], [28, 29, 30]] -[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] - -# array_replace_all scalar function with columns and scalars #1 -query ??? -select - array_replace_all(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column3), - array_replace_all(column1, 1, column3), - array_replace_all(column1, column2, 4) -from arrays_with_repeating_elements; ----- -[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 4, 3] -[1, 2, 2, 7, 5, 7, 7, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] - -query ??? -select - array_replace_all(arrow_cast(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), 'LargeList(Int64)'), column2, column3), - array_replace_all(column1, 1, column3), - array_replace_all(column1, column2, 4) -from large_arrays_with_repeating_elements; ----- -[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 4, 3] -[1, 2, 2, 7, 5, 7, 7, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] - -# array_replace_all scalar function with columns and scalars #2 (element is list) -query ??? -select - array_replace_all( - make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), - column2, - column3 - ), - array_replace_all(column1, make_array(1, 2, 3), column3), - array_replace_all(column1, column2, make_array(11, 12, 13)) -from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [10, 11, 12], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [19, 20, 21], [19, 20, 21], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [11, 12, 13], [11, 12, 13]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [11, 12, 13], [11, 12, 13]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] - -query ??? -select - array_replace_all( - arrow_cast(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), 'LargeList(List(Int64))'), - column2, - column3 - ), - array_replace_all(column1, make_array(1, 2, 3), column3), - array_replace_all(column1, column2, make_array(11, 12, 13)) -from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [10, 11, 12], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [19, 20, 21], [19, 20, 21], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [11, 12, 13], [11, 12, 13]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [11, 12, 13], [11, 12, 13]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] - -# array_replace with null handling - -statement ok -create table t as values - (make_array(3, 1, NULL, 3), 3, 4, 2), - (make_array(3, 1, NULL, 3), NULL, 5, 2), - (NULL, 3, 2, 1), - (make_array(3, 1, 3), 3, NULL, 1) -; - - -# ([3, 1, NULL, 3], 3, 4, 2) => [4, 1, NULL, 4] NULL not matched -# ([3, 1, NULL, 3], NULL, 5, 2) => [3, 1, NULL, 3] NULL is replaced with 5 -# ([NULL], 3, 2, 1) => NULL -# ([3, 1, 3], 3, NULL, 1) => [NULL, 1 3] - -query ?III? -select column1, column2, column3, column4, array_replace_n(column1, column2, column3, column4) from t; ----- -[3, 1, NULL, 3] 3 4 2 [4, 1, NULL, 4] -[3, 1, NULL, 3] NULL 5 2 [3, 1, 5, 3] -NULL 3 2 1 NULL -[3, 1, 3] 3 NULL 1 [NULL, 1, 3] - - - -statement ok -drop table t; - - - -## array_to_string (aliases: `list_to_string`, `array_join`, `list_join`) - -# array_to_string scalar function #1 -query TTT -select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2, 3, 4, 5], '-'), array_to_string([1.0, 2.0, 3.0], '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -# array_to_string scalar function #2 -query TTT -select array_to_string([1, 1, 1], '1'), array_to_string([[1, 2], [3, 4], [5, 6]], '+'), array_to_string(array_repeat(array_repeat(array_repeat(3, 2), 2), 3), '/\'); ----- -11111 1+2+3+4+5+6 3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3 - -# array_to_string scalar function #3 -query T -select array_to_string(make_array(), ',') ----- -(empty) - -# array to string dictionary -statement ok -CREATE TABLE table1 AS VALUES - (1, 'foo'), - (3, 'bar'), - (1, 'foo'), - (2, NULL), - (NULL, 'baz') - ; - -# expect 1-3-1-2 (dictionary values should be repeated) -query T -SELECT array_to_string(array_agg(column1),'-') -FROM ( - SELECT arrow_cast(column1, 'Dictionary(Int32, Int32)') as column1 - FROM table1 -); ----- -1-3-1-2 - -# expect foo,bar,foo,baz (dictionary values should be repeated) -query T -SELECT array_to_string(array_agg(column2),',') -FROM ( - SELECT arrow_cast(column2, 'Dictionary(Int64, Utf8)') as column2 - FROM table1 -); ----- -foo,bar,foo,baz - -# Expect only values that are in the group -query I?T -SELECT column1, array_agg(column2), array_to_string(array_agg(column2),',') -FROM ( - SELECT column1, arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2 - FROM table1 -) -GROUP BY column1 -ORDER BY column1; ----- -1 [foo, foo] foo,foo -2 [NULL] (empty) -3 [bar] bar -NULL [baz] baz - -# verify make_array does force to Utf8View -query T -SELECT arrow_typeof(make_array(arrow_cast('a', 'Utf8View'), 'b', 'c', 'd')); ----- -List(Utf8View) - -# expect a,b,c,d. make_array forces all types to be of a common type (see above) -query T -SELECT array_to_string(make_array(arrow_cast('a', 'Utf8View'), 'b', 'c', 'd'), ','); ----- -a,b,c,d - -# array_to_string using largeutf8 for second arg -query TTT -select array_to_string(['h', 'e', 'l', 'l', 'o'], arrow_cast(',', 'LargeUtf8')), array_to_string([1, 2, 3, 4, 5], arrow_cast('-', 'LargeUtf8')), array_to_string([1.0, 2.0, 3.0], arrow_cast('|', 'LargeUtf8')); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -# array_to_string using utf8view for second arg -query TTT -select array_to_string(['h', 'e', 'l', 'l', 'o'], arrow_cast(',', 'Utf8View')), array_to_string([1, 2, 3, 4, 5], arrow_cast('-', 'Utf8View')), array_to_string([1.0, 2.0, 3.0], arrow_cast('|', 'Utf8View')); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -statement ok -drop table table1; - - -## array_union (aliases: `list_union`) - -# array_union scalar function #1 -query ? -select array_union([1, 2, 3, 4], [5, 6, 3, 4]); ----- -[1, 2, 3, 4, 5, 6] - -query ? -select array_union(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 6, 3, 4], 'LargeList(Int64)')); ----- -[1, 2, 3, 4, 5, 6] - -query ? -select array_union(arrow_cast([1, 2, 3, 4], 'FixedSizeList(4, Int64)'), arrow_cast([5, 6, 3, 4], 'FixedSizeList(4, Int64)')); ----- -[1, 2, 3, 4, 5, 6] - -query ? -select array_union(arrow_cast([1, 2, 3, 4], 'FixedSizeList(4, Int64)'), arrow_cast([5, 6], 'FixedSizeList(2, Int64)')); ----- -[1, 2, 3, 4, 5, 6] - -# array_union scalar function #2 -query ? -select array_union([1, 2, 3, 4], [5, 6, 7, 8]); ----- -[1, 2, 3, 4, 5, 6, 7, 8] - -query ? -select array_union(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 6, 7, 8], 'LargeList(Int64)')); ----- -[1, 2, 3, 4, 5, 6, 7, 8] - -# array_union scalar function #3 -query ? -select array_union([1,2,3], []); ----- -[1, 2, 3] - -query ? -select array_union(arrow_cast([1,2,3], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Int64)')); ----- -[1, 2, 3] - -# array_union scalar function #4 -query ? -select array_union([1, 2, 3, 4], [5, 4]); ----- -[1, 2, 3, 4, 5] - -query ? -select array_union(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 4], 'LargeList(Int64)')); ----- -[1, 2, 3, 4, 5] - -# array_union scalar function #5 -statement ok -CREATE TABLE arrays_with_repeating_elements_for_union -AS VALUES - ([0, 1, 1], []), - ([1, 1], [2]), - ([2, 3], [3]), - ([3], [3, 4]) -; - -query ? -select array_union(column1, column2) from arrays_with_repeating_elements_for_union; ----- -[0, 1] -[1, 2] -[2, 3] -[3, 4] - -query ? -select array_union(arrow_cast(column1, 'LargeList(Int64)'), arrow_cast(column2, 'LargeList(Int64)')) from arrays_with_repeating_elements_for_union; ----- -[0, 1] -[1, 2] -[2, 3] -[3, 4] - -statement ok -drop table arrays_with_repeating_elements_for_union; - -# array_union scalar function #6 -query ? -select array_union([], []); ----- -[] - -query ? -select array_union(arrow_cast([], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Int64)')); ----- -[] - -# array_union scalar function #7 -# re-enable when https://github.com/apache/arrow-rs/issues/9227 is fixed -# query ? -# select array_union([[null]], []); -# ---- -# [[]] - -query error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'array_union' function: -select array_union(arrow_cast([[null]], 'LargeList(List(Int64))'), arrow_cast([], 'LargeList(Int64)')); - -# array_union scalar function #8 -query ? -select array_union([null], [null]); ----- -[NULL] - -query ? -select array_union(arrow_cast([[null]], 'LargeList(List(Int64))'), arrow_cast([[null]], 'LargeList(List(Int64))')); ----- -[[NULL]] - -# array_union scalar function #9 -query ? -select array_union(null, []); ----- -NULL - -query ? -select array_union(null, arrow_cast([], 'LargeList(Int64)')); ----- -NULL - -# array_union scalar function #10 -query ? -select array_union(null, null); ----- -NULL - -# array_union scalar function #11 -query ? -select array_union([1, 1, 2, 2, 3, 3], null); ----- -NULL - -query ? -select array_union(arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)'), null); ----- -NULL - -# array_union scalar function #12 -query ? -select array_union(null, [1, 1, 2, 2, 3, 3]); ----- -NULL - -query ? -select array_union(null, arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)')); ----- -NULL - -# array_union scalar function #13 -query ? -select array_union([1.2, 3.0], [1.2, 3.0, 5.7]); ----- -[1.2, 3.0, 5.7] - -query ? -select array_union(arrow_cast([1.2, 3.0], 'LargeList(Float64)'), arrow_cast([1.2, 3.0, 5.7], 'LargeList(Float64)')); ----- -[1.2, 3.0, 5.7] - -# array_union scalar function #14 -query ? -select array_union(['hello'], ['hello','datafusion']); ----- -[hello, datafusion] - -query ? -select array_union(arrow_cast(['hello'], 'LargeList(Utf8)'), arrow_cast(['hello','datafusion'], 'LargeList(Utf8)')); ----- -[hello, datafusion] - -query ? -select array_union(column1, column2) -from array_intersect_table_1D_NULL; ----- -[1, 2, 3, 4] -[2, 3] -[3, 4] -NULL -NULL -NULL - -query ? -select array_union(arrow_cast(null, 'List(Int64)'), [1, 2]); ----- -NULL - -query ? -select array_union([1, 2], arrow_cast(null, 'List(Int64)')); ----- -NULL - -query ? -select array_intersect(arrow_cast(null, 'List(Int64)'), [1, 2]); ----- -NULL - -query ? -select array_intersect([1, 2], arrow_cast(null, 'List(Int64)')); ----- -NULL - -query ? -select array_except(arrow_cast(null, 'List(Int64)'), [1, 2]); ----- -NULL - -query ? -select array_except([1, 2], arrow_cast(null, 'List(Int64)')); ----- -NULL - -# list_to_string scalar function #4 (function alias `array_to_string`) -query TTT -select list_to_string(['h', 'e', 'l', 'l', 'o'], ','), list_to_string([1, 2, 3, 4, 5], '-'), list_to_string([1.0, 2.0, 3.0], '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -query TTT -select list_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), list_to_string(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), list_to_string(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -# array_join scalar function #5 (function alias `array_to_string`) -query TTT -select array_join(['h', 'e', 'l', 'l', 'o'], ','), array_join([1, 2, 3, 4, 5], '-'), array_join([1.0, 2.0, 3.0], '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -query TTT -select array_join(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), array_join(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), array_join(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -# list_join scalar function #6 (function alias `list_join`) -query TTT -select list_join(['h', 'e', 'l', 'l', 'o'], ','), list_join([1, 2, 3, 4, 5], '-'), list_join([1.0, 2.0, 3.0], '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -query TTT -select list_join(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), list_join(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), list_join(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -# array_to_string scalar function with nulls #1 -query TTT -select array_to_string(make_array('h', NULL, 'l', NULL, 'o'), ','), array_to_string(make_array(1, NULL, 3, NULL, 5), '-'), array_to_string(make_array(NULL, 2.0, 3.0), '|'); ----- -h,l,o 1-3-5 2|3 - -query TTT -select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), array_to_string(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), array_to_string(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -query TTT -select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), ','), array_to_string(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), '-'), array_to_string(arrow_cast([1.0, 2.0, 3.0], 'FixedSizeList(3, Float64)'), '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -# array_to_string scalar function with nulls #2 -query TTT -select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), array_to_string(make_array(NULL, 2, NULL, 4, 5), '-', 'nil'), array_to_string(make_array(1.0, NULL, 3.0), '|', '0'); ----- -h,-,-,-,o nil-2-nil-4-5 1|0|3 - -query TTT -select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'LargeList(Utf8)'), ',', '-'), array_to_string(arrow_cast(make_array(NULL, 2, NULL, 4, 5), 'LargeList(Int64)'), '-', 'nil'), array_to_string(arrow_cast(make_array(1.0, NULL, 3.0), 'LargeList(Float64)'), '|', '0'); ----- -h,-,-,-,o nil-2-nil-4-5 1|0|3 - -query TTT -select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'FixedSizeList(5, Utf8)'), ',', '-'), array_to_string(arrow_cast(make_array(NULL, 2, NULL, 4, 5), 'FixedSizeList(5, Int64)'), '-', 'nil'), array_to_string(arrow_cast(make_array(1.0, NULL, 3.0), 'FixedSizeList(3, Float64)'), '|', '0'); ----- -h,-,-,-,o nil-2-nil-4-5 1|0|3 - -# array_to_string float formatting: special values and longer decimals -query TTT -select - array_to_string(make_array(CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE), CAST('0.30000000000000004' AS DOUBLE), CAST('1.2345678901234567' AS DOUBLE)), '|'), - array_to_string(arrow_cast(make_array(CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE), CAST('0.30000000000000004' AS DOUBLE), CAST('1.2345678901234567' AS DOUBLE)), 'LargeList(Float64)'), '|'), - array_to_string(arrow_cast(make_array(CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE), CAST('0.30000000000000004' AS DOUBLE), CAST('1.2345678901234567' AS DOUBLE)), 'FixedSizeList(5, Float64)'), '|'); ----- -NaN|inf|-inf|0.30000000000000004|1.2345678901234567 NaN|inf|-inf|0.30000000000000004|1.2345678901234567 NaN|inf|-inf|0.30000000000000004|1.2345678901234567 - -# array_to_string float formatting: scientific-notation inputs -query T -select array_to_string( - make_array( - CAST('1E20' AS DOUBLE), - CAST('-1e+20' AS DOUBLE), - CAST('6.02214076e23' AS DOUBLE), - CAST('1.2345e6' AS DOUBLE), - CAST('1e-5' AS DOUBLE), - CAST('-1e-5' AS DOUBLE), - CAST('9.1093837015e-31' AS DOUBLE), - CAST('-2.5e-4' AS DOUBLE) - ), - '|' -); ----- -100000000000000000000|-100000000000000000000|602214076000000000000000|1234500|0.00001|-0.00001|0.00000000000000000000000000000091093837015|-0.00025 - -query T -select array_to_string(arrow_cast([arrow_cast([NULL, 'a'], 'FixedSizeList(2, Utf8)'), NULL], 'FixedSizeList(2, FixedSizeList(2, Utf8))'), ',', '-'); ----- --,a,- - -# array_to_string with columns #1 - -# For reference -# select column1, column4 from arrays_values; -# ---- -# [NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] , -# [11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] . -# [21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] - -# [31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] ok -# NULL @ -# [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] $ -# [51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] ^ -# [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] NULL - -query T -select array_to_string(column1, column4) from arrays_values; ----- -2,3,4,5,6,7,8,9,10 -11.12.13.14.15.16.17.18.20 -21-22-23-25-26-27-28-29-30 -31ok32ok33ok34ok35ok37ok38ok39ok40 -NULL -41$42$43$44$45$46$47$48$49$50 -51^52^54^55^56^57^58^59^60 -NULL - -query T -select array_to_string(column1, column4) from large_arrays_values; ----- -2,3,4,5,6,7,8,9,10 -11.12.13.14.15.16.17.18.20 -21-22-23-25-26-27-28-29-30 -31ok32ok33ok34ok35ok37ok38ok39ok40 -NULL -41$42$43$44$45$46$47$48$49$50 -51^52^54^55^56^57^58^59^60 -NULL - -query TT -select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/') from arrays_values; ----- -2_3_4_5_6_7_8_9_10 1/2/3 -11_12_13_14_15_16_17_18_20 1/2/3 -21_22_23_25_26_27_28_29_30 1/2/3 -31_32_33_34_35_37_38_39_40 1/2/3 -NULL 1/2/3 -41_42_43_44_45_46_47_48_49_50 1/2/3 -51_52_54_55_56_57_58_59_60 1/2/3 -61_62_63_64_65_66_67_68_69_70 1/2/3 - -query TT -select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/') from large_arrays_values; ----- -2_3_4_5_6_7_8_9_10 1/2/3 -11_12_13_14_15_16_17_18_20 1/2/3 -21_22_23_25_26_27_28_29_30 1/2/3 -31_32_33_34_35_37_38_39_40 1/2/3 -NULL 1/2/3 -41_42_43_44_45_46_47_48_49_50 1/2/3 -51_52_54_55_56_57_58_59_60 1/2/3 -61_62_63_64_65_66_67_68_69_70 1/2/3 - -query TT -select array_to_string(column1, '_', '*'), array_to_string(make_array(make_array(1,2,3)), '.') from arrays_values; ----- -*_2_3_4_5_6_7_8_9_10 1.2.3 -11_12_13_14_15_16_17_18_*_20 1.2.3 -21_22_23_*_25_26_27_28_29_30 1.2.3 -31_32_33_34_35_*_37_38_39_40 1.2.3 -NULL 1.2.3 -41_42_43_44_45_46_47_48_49_50 1.2.3 -51_52_*_54_55_56_57_58_59_60 1.2.3 -61_62_63_64_65_66_67_68_69_70 1.2.3 - -query TT -select array_to_string(column1, '_', '*'), array_to_string(make_array(make_array(1,2,3)), '.') from large_arrays_values; ----- -*_2_3_4_5_6_7_8_9_10 1.2.3 -11_12_13_14_15_16_17_18_*_20 1.2.3 -21_22_23_*_25_26_27_28_29_30 1.2.3 -31_32_33_34_35_*_37_38_39_40 1.2.3 -NULL 1.2.3 -41_42_43_44_45_46_47_48_49_50 1.2.3 -51_52_*_54_55_56_57_58_59_60 1.2.3 -61_62_63_64_65_66_67_68_69_70 1.2.3 - -# array_to_string with per-row null_string column -statement ok -CREATE TABLE test_null_str_col AS VALUES - (make_array(1, NULL, 3), ',', 'N/A'), - (make_array(NULL, 5, NULL), ',', 'MISSING'), - (make_array(10, NULL, 12), '-', 'X'), - (make_array(20, NULL, 21), '-', NULL); - -query T -SELECT array_to_string(column1, column2, column3) FROM test_null_str_col; ----- -1,N/A,3 -MISSING,5,MISSING -10-X-12 -20-21 - -statement ok -DROP TABLE test_null_str_col; - -# array_to_string with decimal values -query T -select array_to_string(arrow_cast(make_array(1.5, NULL, 3.14), 'List(Decimal128(10, 2))'), ',', 'N'); ----- -1.50,N,3.14 - -# array_to_string with date values -query T -select array_to_string(arrow_cast(make_array('2024-01-15', '2024-06-30', '2024-12-25'), 'List(Date32)'), ','); ----- -2024-01-15,2024-06-30,2024-12-25 - -query T -select array_to_string(arrow_cast(make_array('2024-01-15', NULL, '2024-12-25'), 'List(Date32)'), ',', 'N'); ----- -2024-01-15,N,2024-12-25 - -# array_to_string with timestamp values -query T -select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Second, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Second, None)')), '|'); ----- -2024-01-15T10:30:00|2024-06-30T15:45:00 - -query T -select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Millisecond, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Millisecond, None)')), '|'); ----- -2024-01-15T10:30:00|2024-06-30T15:45:00 - -query T -select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Microsecond, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Microsecond, None)')), '|'); ----- -2024-01-15T10:30:00|2024-06-30T15:45:00 - -query T -select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Nanosecond, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Nanosecond, None)')), '|'); ----- -2024-01-15T10:30:00|2024-06-30T15:45:00 - -# array_to_string with time values -query T -select array_to_string(make_array(arrow_cast('10:30:00', 'Time32(Second)'), arrow_cast('15:45:00', 'Time32(Second)')), ','); ----- -10:30:00,15:45:00 - -query T -select array_to_string(make_array(arrow_cast('10:30:00', 'Time64(Microsecond)'), arrow_cast('15:45:00', 'Time64(Microsecond)')), ','); ----- -10:30:00,15:45:00 - -# array_to_string with interval values -query T -select array_to_string(make_array(interval '1 year 2 months', interval '3 days 4 hours'), ','); ----- -14 mons,3 days 4 hours - -# array_to_string with duration values -query T -select array_to_string(make_array(arrow_cast(1000, 'Duration(Millisecond)'), arrow_cast(2000, 'Duration(Millisecond)')), ','); ----- -PT1S,PT2S - - -## cardinality - -# cardinality scalar function -query III -select cardinality(make_array(1, 2, 3, 4, 5)), cardinality([1, 3, 5]), cardinality(make_array('h', 'e', 'l', 'l', 'o')); ----- -5 3 5 - -query III -select cardinality(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), cardinality(arrow_cast([1, 3, 5], 'LargeList(Int64)')), cardinality(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -5 3 5 - -query III -select cardinality(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)')), cardinality(arrow_cast([1, 3, 5], 'FixedSizeList(3, Int64)')), cardinality(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); ----- -5 3 5 - -# cardinality scalar function #2 -query II -select cardinality(make_array([1, 2], [3, 4], [5, 6])), cardinality(array_repeat(array_repeat(array_repeat(3, 3), 2), 3)); ----- -6 18 - -query I -select cardinality(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))')); ----- -6 - -query I -select cardinality(arrow_cast([[1, 2], [3, 4], [5, 6]], 'FixedSizeList(3, List(Int64))')); ----- -6 - -# cardinality scalar function #3 -query II -select cardinality(make_array()), cardinality(make_array(make_array())) ----- -0 0 - -query II -select cardinality([]), cardinality([]::int[]) as with_cast ----- -0 0 - -query II -select cardinality(arrow_cast(make_array(), 'LargeList(Int64)')), cardinality(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) ----- -0 0 - -#TODO -#https://github.com/apache/datafusion/issues/9158 -#query II -#select cardinality(arrow_cast(make_array(), 'FixedSizeList(1, Null)')), cardinality(arrow_cast(make_array(make_array()), 'FixedSizeList(1, List(Int64))')) -#---- -#NULL 0 - -# cardinality of NULL arrays should return NULL -query II -select cardinality(NULL), cardinality(arrow_cast(NULL, 'LargeList(Int64)')) ----- -NULL NULL - -# cardinality with columns -query III -select cardinality(column1), cardinality(column2), cardinality(column3) from arrays; ----- -4 3 5 -4 3 5 -4 3 5 -4 3 3 -NULL 3 4 -4 NULL 1 -4 3 NULL - -query III -select cardinality(column1), cardinality(column2), cardinality(column3) from large_arrays; ----- -4 3 5 -4 3 5 -4 3 5 -4 3 3 -NULL 3 4 -4 NULL 1 -4 3 NULL - -query III -select cardinality(column1), cardinality(column2), cardinality(column3) from fixed_size_arrays; ----- -4 3 5 -4 3 5 -4 3 5 -4 3 5 -NULL 3 5 -4 NULL 5 -4 3 NULL - -## array_remove (aliases: `list_remove`) - -# array_remove scalar function #1 -query ??? -select array_remove(make_array(1, 2, 2, 1, 1), 2), array_remove(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), array_remove(make_array('h', 'e', 'l', 'l', 'o'), 'l'); ----- -[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] - -query ??? -select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2), - array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'LargeList(Float64)'), 1.0), - array_remove(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 'l'); ----- -[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] - -query ??? -select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int64)'), 2), - array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float64)'), 1.0), - array_remove(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 'l'); ----- -[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] - -query ??? -select - array_remove(make_array(1, null, 2, 3), 2), - array_remove(make_array(1.1, null, 2.2, 3.3), 1.1), - array_remove(make_array('a', null, 'bc'), 'a'); ----- -[1, NULL, 3] [NULL, 2.2, 3.3] [NULL, bc] - -query ??? -select - array_remove(arrow_cast(make_array(1, null, 2, 3), 'LargeList(Int64)'), 2), - array_remove(arrow_cast(make_array(1.1, null, 2.2, 3.3), 'LargeList(Float64)'), 1.1), - array_remove(arrow_cast(make_array('a', null, 'bc'), 'LargeList(Utf8)'), 'a'); ----- -[1, NULL, 3] [NULL, 2.2, 3.3] [NULL, bc] - -query ??? -select - array_remove(arrow_cast(make_array(1, null, 2, 3), 'FixedSizeList(4, Int64)'), 2), - array_remove(arrow_cast(make_array(1.1, null, 2.2, 3.3), 'FixedSizeList(4, Float64)'), 1.1), - array_remove(arrow_cast(make_array('a', null, 'bc'), 'FixedSizeList(3, Utf8)'), 'a'); ----- -[1, NULL, 3] [NULL, 2.2, 3.3] [NULL, bc] - -#TODO: https://github.com/apache/datafusion/issues/7142 -# follow PostgreSQL behavior -#query ? -#select -# array_remove(NULL, 1) -#---- -#NULL - -query ?? -select - array_remove(make_array(1, null, 2), null), - array_remove(make_array(1, null, 2, null), null); ----- -NULL NULL - -query ?? -select - array_remove(arrow_cast(make_array(1, null, 2), 'LargeList(Int64)'), null), - array_remove(arrow_cast(make_array(1, null, 2, null), 'LargeList(Int64)'), null); ----- -NULL NULL - -query ?? -select - array_remove(arrow_cast(make_array(1, null, 2), 'FixedSizeList(3, Int64)'), null), - array_remove(arrow_cast(make_array(1, null, 2, null), 'FixedSizeList(4, Int64)'), null); ----- -NULL NULL - -# array_remove with null element from column -query ? -select array_remove(column1, column2) from (values - (make_array(1, 2, 3), 2), - (make_array(4, 5, 6), null), - (make_array(7, 8, 9), 8), - (null, 1) -) as t(column1, column2); ----- -[1, 3] -NULL -[7, 9] -NULL - -# array_remove with null element from column (LargeList) -query ? -select array_remove(column1, column2) from (values - (arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 2), - (arrow_cast(make_array(4, 5, 6), 'LargeList(Int64)'), null), - (arrow_cast(make_array(7, 8, 9), 'LargeList(Int64)'), 8) -) as t(column1, column2); ----- -[1, 3] -NULL -[7, 9] - -# array_remove scalar function #2 (element is list) -query ?? -select array_remove(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_remove(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6]), - array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), - array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [4, 5, 6]), - array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -# list_remove scalar function #3 (function alias `array_remove`) -query ??? -select list_remove(make_array(1, 2, 2, 1, 1), 2), list_remove(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), list_remove(make_array('h', 'e', 'l', 'l', 'o'), 'l'); ----- -[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] - -query ?? -select list_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), - list_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -# array_remove scalar function with columns #1 -query ? -select array_remove(column1, column2) from arrays_with_repeating_elements; ----- -[1, 1, 3, 2, 2, 1, 3, 2, 3] -[4, 5, 5, 6, 5, 5, 5, 4, 4] -[7, 7, 8, 7, 9, 7, 8, 7, 7] -[11, 12, 10, 11, 12, 10, 11, 12, 10] - -query ? -select array_remove(column1, column2) from large_arrays_with_repeating_elements; ----- -[1, 1, 3, 2, 2, 1, 3, 2, 3] -[4, 5, 5, 6, 5, 5, 5, 4, 4] -[7, 7, 8, 7, 9, 7, 8, 7, 7] -[11, 12, 10, 11, 12, 10, 11, 12, 10] - -query ? -select array_remove(column1, column2) from fixed_arrays_with_repeating_elements; ----- -[1, 1, 3, 2, 2, 1, 3, 2, 3] -[4, 5, 5, 6, 5, 5, 5, 4, 4] -[7, 7, 8, 7, 9, 7, 8, 7, 7] -[11, 12, 10, 11, 12, 10, 11, 12, 10] - -# array_remove scalar function with columns #2 (element is list) -query ? -select array_remove(column1, column2) from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ? -select array_remove(column1, column2) from large_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ? -select array_remove(column1, column2) from fixed_size_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -# array_remove scalar function with columns and scalars #1 -query ?? -select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from arrays_with_repeating_elements; ----- -[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3] -[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -query ?? -select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from large_arrays_with_repeating_elements; ----- -[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3] -[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -query ?? -select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from fixed_arrays_with_repeating_elements; ----- -[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3] -[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -# array_remove scalar function with columns and scalars #2 (element is list) -query ?? -select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), - array_remove(column1, make_array(1, 2, 3)) from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ?? -select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), - array_remove(column1, make_array(1, 2, 3)) from large_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ?? -select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), - array_remove(column1, make_array(1, 2, 3)) from fixed_size_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -## array_remove_n (aliases: `list_remove_n`) - -# array_remove_n with null element scalar -query ?? -select array_remove_n(make_array(1, 2, 2, 1, 1), NULL, 2), - array_remove_n(make_array(1, 2, 2, 1, 1), 2, 2); ----- -NULL [1, 1, 1] - -# array_remove_n with null element scalar (LargeList) -query ?? -select array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), NULL, 2), - array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2, 2); ----- -NULL [1, 1, 1] - -# array_remove_n with null element from column -query ? -select array_remove_n(column1, column2, column3) from (values - (make_array(1, 2, 2, 1, 1), 2, 2), - (make_array(3, 4, 4, 3, 3), null, 2), - (make_array(5, 6, 6, 5, 5), 6, 1), - (null, 1, 1) -) as t(column1, column2, column3); ----- -[1, 1, 1] -NULL -[5, 6, 5, 5] -NULL - -# array_remove_n with null element from column (LargeList) -query ? -select array_remove_n(column1, column2, column3) from (values - (arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2, 2), - (arrow_cast(make_array(3, 4, 4, 3, 3), 'LargeList(Int64)'), null, 2), - (arrow_cast(make_array(5, 6, 6, 5, 5), 'LargeList(Int64)'), 6, 1) -) as t(column1, column2, column3); ----- -[1, 1, 1] -NULL -[5, 6, 5, 5] - -# array_remove_n scalar function #1 -query ??? -select array_remove_n(make_array(1, 2, 2, 1, 1), 2, 2), array_remove_n(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0, 2), array_remove_n(make_array('h', 'e', 'l', 'l', 'o'), 'l', 3); ----- -[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] - -query ??? -select array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int32)'), 2, 2), - array_remove_n(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'LargeList(Float32)'), 1.0, 2), - array_remove_n(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 'l', 3); ----- -[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] - -query ??? -select array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int32)'), 2, 2), - array_remove_n(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float32)'), 1.0, 2), - array_remove_n(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 'l', 3); ----- -[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] - -# array_remove_n scalar function #2 (element is list) -query ?? -select array_remove_n(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6], 2), array_remove_n(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4], 2); ----- -[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] - -query ?? -select array_remove_n(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6], 2), - array_remove_n(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4], 2); ----- -[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] - -query ?? -select array_remove_n(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [4, 5, 6], 2), - array_remove_n(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [2, 3, 4], 2); ----- -[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] - -# list_remove_n scalar function #3 (function alias `array_remove_n`) -query ??? -select list_remove_n(make_array(1, 2, 2, 1, 1), 2, 2), list_remove_n(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0, 2), list_remove_n(make_array('h', 'e', 'l', 'l', 'o'), 'l', 3); ----- -[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] - -# array_remove_n scalar function with columns #1 -query ? -select array_remove_n(column1, column2, column4) from arrays_with_repeating_elements; ----- -[1, 1, 3, 1, 3, 2, 3] -[5, 5, 6, 5, 5, 5, 4, 4] -[8, 9, 8, 7, 7] -[11, 12, 11, 12, 11, 12] - -# array_remove_n scalar function with columns #2 (element is list) -query ? -select array_remove_n(column1, column2, column4) from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [1, 2, 3], [7, 8, 9], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[22, 23, 24], [25, 26, 27], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36]] - -# array_remove_n scalar function with columns and scalars #1 -query ??? -select array_remove_n(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column4), array_remove_n(column1, 1, column4), array_remove_n(column1, column2, 2) from arrays_with_repeating_elements; ----- -[1, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 3, 2, 2, 3, 2, 3] [1, 1, 3, 2, 1, 3, 2, 3] -[1, 2, 2, 5, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [11, 12, 11, 12, 10, 11, 12, 10] - -# array_remove_n scalar function with columns and scalars #2 (element is list) -query ??? -select array_remove_n(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2, column4), array_remove_n(column1, make_array(1, 2, 3), column4), array_remove_n(column1, column2, 2) from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [7, 8, 9], [4, 5, 6], [4, 5, 6], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -## array_remove_all (aliases: `list_removes`) - -#TODO: https://github.com/apache/datafusion/issues/7142 -# array_remove_all with NULL elements -#query ? -#select array_remove_all(NULL, 1); -#---- -#NULL - -query ? -select array_remove_all(make_array(1, 2, 2, 1, 1), NULL); ----- -NULL - -# array_remove_all with null element from column -query ? -select array_remove_all(column1, column2) from (values - (make_array(1, 2, 2, 1, 1), 2), - (make_array(3, 4, 4, 3, 3), null), - (make_array(5, 6, 6, 5, 5), 6), - (null, 1) -) as t(column1, column2); ----- -[1, 1, 1] -NULL -[5, 5, 5] -NULL - -# array_remove_all with null element from column (LargeList) -query ? -select array_remove_all(column1, column2) from (values - (arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2), - (arrow_cast(make_array(3, 4, 4, 3, 3), 'LargeList(Int64)'), null), - (arrow_cast(make_array(5, 6, 6, 5, 5), 'LargeList(Int64)'), 6) -) as t(column1, column2); ----- -[1, 1, 1] -NULL -[5, 5, 5] - -# array_remove_all scalar function #1 -query ??? -select array_remove_all(make_array(1, 2, 2, 1, 1), 2), array_remove_all(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), array_remove_all(make_array('h', 'e', 'l', 'l', 'o'), 'l'); ----- -[1, 1, 1] [2.0, 2.0] [h, e, o] - -query ??? -select array_remove_all(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2), - array_remove_all(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'LargeList(Float64)'), 1.0), - array_remove_all(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 'l'); ----- -[1, 1, 1] [2.0, 2.0] [h, e, o] - -query ??? -select array_remove_all(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int64)'), 2), array_remove_all(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float64)'), 1.0), array_remove_all(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 'l'); ----- -[1, 1, 1] [2.0, 2.0] [h, e, o] - -# array_remove_all scalar function #2 (element is list) -query ?? -select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_remove_all(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] - -query ?? -select array_remove_all(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), - array_remove_all(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] - -query ?? -select array_remove_all(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [4, 5, 6]), - array_remove_all(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] - -# list_remove_all scalar function #3 (function alias `array_remove_all`) -query ??? -select list_remove_all(make_array(1, 2, 2, 1, 1), 2), list_remove_all(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), list_remove_all(make_array('h', 'e', 'l', 'l', 'o'), 'l'); ----- -[1, 1, 1] [2.0, 2.0] [h, e, o] - -query ?? -select list_remove_all(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), - list_remove_all(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] - -# array_remove_all scalar function with columns #1 -query ? -select array_remove_all(column1, column2) from arrays_with_repeating_elements; ----- -[1, 1, 3, 1, 3, 3] -[5, 5, 6, 5, 5, 5] -[8, 9, 8] -[11, 12, 11, 12, 11, 12] - -query ? -select array_remove_all(column1, column2) from fixed_arrays_with_repeating_elements; ----- -[1, 1, 3, 1, 3, 3] -[5, 5, 6, 5, 5, 5] -[8, 9, 8] -[11, 12, 11, 12, 11, 12] - -# array_remove_all scalar function with columns #2 (element is list) -query ? -select array_remove_all(column1, column2) from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [1, 2, 3], [7, 8, 9], [1, 2, 3], [7, 8, 9], [7, 8, 9]] -[[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15]] -[[22, 23, 24], [25, 26, 27], [22, 23, 24]] -[[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36]] - -query ? -select array_remove_all(column1, column2) from fixed_size_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [1, 2, 3], [7, 8, 9], [1, 2, 3], [7, 8, 9], [7, 8, 9]] -[[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15]] -[[22, 23, 24], [25, 26, 27], [22, 23, 24]] -[[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36]] - -# array_remove_all scalar function with columns and scalars #1 -query ?? -select array_remove_all(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove_all(column1, 1) from arrays_with_repeating_elements; ----- -[1, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 3, 2, 2, 3, 2, 3] -[1, 2, 2, 5, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -query ?? -select array_remove_all(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove_all(column1, 1) from fixed_arrays_with_repeating_elements; ----- -[1, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 3, 2, 2, 3, 2, 3] -[1, 2, 2, 5, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -# array_remove_all scalar function with columns and scalars #2 (element is list) -query ?? -select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), array_remove_all(column1, make_array(1, 2, 3)) from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [7, 8, 9], [4, 5, 6], [4, 5, 6], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ?? -select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), - array_remove_all(column1, make_array(1, 2, 3)) from fixed_size_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [7, 8, 9], [4, 5, 6], [4, 5, 6], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -## trim_array (deprecated) - -## array_length (aliases: `list_length`) - -# array_length scalar function #1 -query III -select array_length(make_array(1, 2, 3, 4, 5)), array_length(make_array(1, 2, 3)), array_length(make_array([1, 2], [3, 4], [5, 6])); ----- -5 3 3 - -query III -select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), array_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))')); ----- -5 3 3 - -# array_length scalar function #2 -query III -select array_length(make_array(1, 2, 3, 4, 5), 1), array_length(make_array(1, 2, 3), 1), array_length(make_array([1, 2], [3, 4], [5, 6]), 1); ----- -5 3 3 - -query III -select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 1), array_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 1), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))'), 1); ----- -5 3 3 - -# array_length scalar function #3 -query III -select array_length(make_array(1, 2, 3, 4, 5), 2), array_length(make_array(1, 2, 3), 2), array_length(make_array([1, 2], [3, 4], [5, 6]), 2); ----- -NULL NULL 2 - -query III -select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 2), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))'), 2); ----- -NULL NULL 2 - -# array_length scalar function #4 -query II -select array_length(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 1), array_length(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 2); ----- -3 2 - -query II -select array_length(arrow_cast(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 'LargeList(List(List(Int64)))'), 1), array_length(arrow_cast(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 'LargeList(List(List(Int64)))'), 2); ----- -3 2 - -# array_length scalar function #5 -query III -select array_length(make_array()), array_length(make_array(), 1), array_length(make_array(), 2) ----- -0 0 NULL - -# array_length scalar function #6 nested array -query III -select array_length([[1, 2, 3, 4], [5, 6, 7, 8]]), array_length([[1, 2, 3, 4], [5, 6, 7, 8]], 1), array_length([[1, 2, 3, 4], [5, 6, 7, 8]], 2); ----- -2 2 4 - -# list_length scalar function #7 (function alias `array_length`) -query IIII -select list_length(make_array(1, 2, 3, 4, 5)), list_length(make_array(1, 2, 3)), list_length(make_array([1, 2], [3, 4], [5, 6])), array_length([[1, 2, 3, 4], [5, 6, 7, 8]], 3); ----- -5 3 3 NULL - -query III -select list_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), list_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))')); ----- -5 3 3 - -# array_length with columns -query I -select array_length(column1, column3) from arrays_values; ----- -10 -NULL -NULL -NULL -NULL -NULL -NULL -NULL - -query I -select array_length(arrow_cast(column1, 'LargeList(Int64)'), column3) from arrays_values; ----- -10 -NULL -NULL -NULL -NULL -NULL -NULL -NULL - -# array_length with columns and scalars -query II -select array_length(array[array[1, 2], array[3, 4]], column3), array_length(column1, 1) from arrays_values; ----- -2 10 -2 10 -NULL 10 -NULL 10 -NULL NULL -NULL 10 -NULL 10 -NULL 10 - -query II -select array_length(arrow_cast(array[array[1, 2], array[3, 4]], 'LargeList(List(Int64))'), column3), array_length(arrow_cast(column1, 'LargeList(Int64)'), 1) from arrays_values; ----- -2 10 -2 10 -NULL 10 -NULL 10 -NULL NULL -NULL 10 -NULL 10 -NULL 10 - -# array_length for fixed sized list - -query III -select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_length(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3, List(Int64))')); ----- -5 3 3 - -query III -select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 1), array_length(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 1), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3, List(Int64))'), 1); ----- -5 3 3 - - -query RRR -select array_distance([2], [3]), list_distance([1], [2]), list_distance([1], [-2]); ----- -1 1 3 - -query error -select list_distance([1], [1, 2]); - -query R -select array_distance([[1, 1]], [1, 2]); ----- -1 - -query R -select array_distance([[1, 1]], [[1, 2]]); ----- -1 - -query R -select array_distance([[1, 1]], [[1, 2]]); ----- -1 - -query RR -select array_distance([1, 1, 0, 0], [2, 2, 1, 1]), list_distance([1, 2, 3], [1, 2, 3]); ----- -2 0 - -query RR -select array_distance([1.0, 1, 0, 0], [2, 2.0, 1, 1]), list_distance([1, 2.0, 3], [1, 2, 3]); ----- -2 0 - -query R -select list_distance([1, 1, NULL, 0], [2, 2, NULL, NULL]); ----- -NULL - -query R -select list_distance([NULL, NULL], [NULL, NULL]); ----- -NULL - -query R -select list_distance([1.0, 2.0, 3.0], [1.0, 2.0, 3.5]) AS distance; ----- -0.5 - -query R -select list_distance([1, 2, 3], [1, 2, 3]) AS distance; ----- -0 - -# array_distance with columns -query RRR -select array_distance(column1, column2), array_distance(column1, column3), array_distance(column1, column4) from arrays_distance_table; ----- -0 0.374165738677 NULL -5.196152422707 6.063827174318 NULL -10.392304845413 11.778794505381 NULL -15.58845726812 15.935494971917 NULL - -query RRR -select array_distance(column1, column2), array_distance(column1, column3), array_distance(column1, column4) from large_arrays_distance_table; ----- -0 0.374165738677 NULL -5.196152422707 6.063827174318 NULL -10.392304845413 11.778794505381 NULL -15.58845726812 15.935494971917 NULL - -query RRR -select array_distance(column1, column2), array_distance(column1, column3), array_distance(column1, column4) from fixed_size_arrays_distance_table; ----- -0 0.374165738677 NULL -5.196152422707 6.063827174318 NULL -10.392304845413 11.778794505381 NULL -15.58845726812 15.935494971917 NULL - - -## array_dims (aliases: `list_dims`) - -# array dims error -query error -select array_dims(1); - -# array_dims scalar function -query ??? -select array_dims(make_array(1, 2, 3)), array_dims(make_array([1, 2], [3, 4])), array_dims(make_array([[[[1], [2]]]])); ----- -[3] [2, 2] [1, 1, 1, 2, 1] - -query ??? -select array_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_dims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), array_dims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))')); ----- -[3] [2, 2] [1, 1, 1, 2, 1] - -query ??? -select array_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2, List(Int64))')), array_dims(arrow_cast(make_array([[[[1], [2]]]]), 'FixedSizeList(1, List(List(List(List(Int64)))))')); ----- -[3] [2, 2] [1, 1, 1, 2, 1] - -# array_dims scalar function #2 -query ?? -select array_dims(array_repeat(array_repeat(array_repeat(2, 3), 2), 1)), array_dims(array_repeat(array_repeat(array_repeat(3, 4), 5), 2)); ----- -[1, 2, 3] [2, 5, 4] - -query ?? -select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2), 1), 'LargeList(List(List(Int64)))')), array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2), 'LargeList(List(List(Int64)))')); ----- -[1, 2, 3] [2, 5, 4] - -query ?? -select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2), 1), 'FixedSizeList(1, List(List(Int64)))')), array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2), 'FixedSizeList(2, List(List(Int64)))')); ----- -[1, 2, 3] [2, 5, 4] - -# array_dims scalar function #3 -query ?? -select array_dims(make_array()), array_dims(make_array(make_array())) ----- -NULL [1, 0] - -query ?? -select array_dims(arrow_cast(make_array(), 'LargeList(Int64)')), array_dims(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) ----- -NULL [1, 0] - -# list_dims scalar function #4 (function alias `array_dims`) -query ??? -select list_dims(make_array(1, 2, 3)), list_dims(make_array([1, 2], [3, 4])), list_dims(make_array([[[[1], [2]]]])); ----- -[3] [2, 2] [1, 1, 1, 2, 1] - -query ??? -select list_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_dims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), list_dims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))')); ----- -[3] [2, 2] [1, 1, 1, 2, 1] - -query ??? -select list_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), list_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2, List(Int64))')), list_dims(arrow_cast(make_array([[[[1], [2]]]]), 'FixedSizeList(1, List(List(List(List(Int64)))))')); ----- -[3] [2, 2] [1, 1, 1, 2, 1] - -# array_dims with columns -query ??? -select array_dims(column1), array_dims(column2), array_dims(column3) from arrays; ----- -[2, 2] [3] [5] -[2, 2] [3] [5] -[2, 2] [3] [5] -[2, 2] [3] [3] -NULL [3] [4] -[2, 2] NULL [1] -[2, 2] [3] NULL - -query ??? -select array_dims(column1), array_dims(column2), array_dims(column3) from large_arrays; ----- -[2, 2] [3] [5] -[2, 2] [3] [5] -[2, 2] [3] [5] -[2, 2] [3] [3] -NULL [3] [4] -[2, 2] NULL [1] -[2, 2] [3] NULL - -query ??? -select array_dims(column1), array_dims(column2), array_dims(column3) from fixed_size_arrays; ----- -[2, 2] [3] [5] -[2, 2] [3] [5] -[2, 2] [3] [5] -[2, 2] [3] [5] -NULL [3] [5] -[2, 2] NULL [5] -[2, 2] [3] NULL - - -## array_ndims (aliases: `list_ndims`) - -# array_ndims scalar function #1 - -#follow PostgreSQL -query I -select - array_ndims(null); ----- -NULL - -query I -select - array_ndims([2, 3]); ----- -1 - -statement ok -CREATE TABLE array_ndims_table -AS VALUES - ([1], [1, 2, 3], [[7]], [[[[[10]]]]]), - ([2], [4, 5], [[8]], [[[[[10]]]]]), - (NUll, [6, 7], [[9]], [[[[[10]]]]]), - ([3], [6], [[9]], [[[[[10]]]]]) -; - -statement ok -CREATE TABLE large_array_ndims_table -AS SELECT - column1, - arrow_cast(column2, 'LargeList(Int64)') as column2, - arrow_cast(column3, 'LargeList(List(Int64))') as column3, - arrow_cast(column4, 'LargeList(List(List(List(List(Int64)))))') as column4 -FROM array_ndims_table; - -statement ok -CREATE TABLE fixed_array_ndims_table -AS VALUES - (arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)'), arrow_cast([[7]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), - (arrow_cast([2], 'FixedSizeList(1, Int64)'), arrow_cast([4, 5, 6], 'FixedSizeList(3, Int64)'), arrow_cast([[8]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), - (null, arrow_cast([6, 7, 8], 'FixedSizeList(3, Int64)'), arrow_cast([[9]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), - (arrow_cast([3], 'FixedSizeList(1, Int64)'), arrow_cast([6, 7, 8], 'FixedSizeList(3, Int64)'), arrow_cast([[9]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')) -; - -query IIII -select - array_ndims(column1), - array_ndims(column2), - array_ndims(column3), - array_ndims(column4) -from array_ndims_table; ----- -1 1 2 5 -1 1 2 5 -NULL 1 2 5 -1 1 2 5 - -query IIII -select - array_ndims(column1), - array_ndims(column2), - array_ndims(column3), - array_ndims(column4) -from large_array_ndims_table; ----- -1 1 2 5 -1 1 2 5 -NULL 1 2 5 -1 1 2 5 - -query IIII -select - array_ndims(column1), - array_ndims(column2), - array_ndims(column3), - array_ndims(column4) -from fixed_array_ndims_table; ----- -1 1 2 5 -1 1 2 5 -NULL 1 2 5 -1 1 2 5 - - - -statement ok -drop table array_ndims_table; - -statement ok -drop table large_array_ndims_table - -query I -select array_ndims(arrow_cast([null], 'List(List(List(Int64)))')); ----- -3 - -# array_ndims scalar function #2 -query II -select array_ndims(array_repeat(array_repeat(array_repeat(1, 3), 2), 1)), array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]); ----- -3 21 - -# array_ndims scalar function #3 -query II -select array_ndims(make_array()), array_ndims(make_array(make_array())) ----- -1 2 - -query II -select array_ndims(arrow_cast(make_array(), 'LargeList(Int64)')), array_ndims(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) ----- -1 2 - -# list_ndims scalar function #4 (function alias `array_ndims`) -query III -select list_ndims(make_array(1, 2, 3)), list_ndims(make_array([1, 2], [3, 4])), list_ndims(make_array([[[[1], [2]]]])); ----- -1 2 5 - -query III -select list_ndims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_ndims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), list_ndims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))')); ----- -1 2 5 - -query II -select list_ndims(make_array()), list_ndims(make_array(make_array())) ----- -1 2 - -query II -select list_ndims(arrow_cast(make_array(), 'LargeList(Int64)')), list_ndims(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) ----- -1 2 - -# array_ndims with columns -query III -select array_ndims(column1), array_ndims(column2), array_ndims(column3) from arrays; ----- -2 1 1 -2 1 1 -2 1 1 -2 1 1 -NULL 1 1 -2 NULL 1 -2 1 NULL - -query III -select array_ndims(column1), array_ndims(column2), array_ndims(column3) from large_arrays; ----- -2 1 1 -2 1 1 -2 1 1 -2 1 1 -NULL 1 1 -2 NULL 1 -2 1 NULL - -## array_has/array_has_all/array_has_any - -# If lhs is empty, return false -query B -select array_has([], 1); ----- -false - -# If rhs is Null, we returns Null -query BBB -select array_has([], null), - array_has([1, 2, 3], null), - array_has([null, 1], null); ----- -NULL NULL NULL - -# Always return false if not contained even if list has null elements -query BB -select array_has([1, null, 2], 3), - array_has([null, null, null], 3); ----- -false false - -#TODO: array_has_all and array_has_any cannot handle NULL -#query BBBB -#select array_has_any([], null), -# array_has_any([1, 2, 3], null), -# array_has_all([], null), -# array_has_all([1, 2, 3], null); -#---- -#false false false false - -query BBBBBBBBBBBB -select array_has(make_array(1,2), 1), - array_has(make_array(1,2,NULL), 1), - array_has(make_array([2,3], [3,4]), make_array(2,3)), - array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([1], [2,3])), - array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([4,5], [6])), - array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([1])), - array_has(make_array([[[1]]]), make_array([[1]])), - array_has(make_array([[[1]]], [[[1], [2]]]), make_array([[2]])), - array_has(make_array([[[1]]], [[[1], [2]]]), make_array([[1], [2]])), - list_has(make_array(1,2,3), 4), - array_contains(make_array(1,2,3), 3), - list_contains(make_array(1,2,3), 0) -; ----- -true true true true true false true false true false true false - -query BBBBBBBBBBBB -select array_has(arrow_cast(make_array(1,2), 'LargeList(Int64)'), 1), - array_has(arrow_cast(make_array(1,2,NULL), 'LargeList(Int64)'), 1), - array_has(arrow_cast(make_array([2,3], [3,4]), 'LargeList(List(Int64))'), make_array(2,3)), - array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'LargeList(List(List(Int64)))'), make_array([1], [2,3])), - array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'LargeList(List(List(Int64)))'), make_array([4,5], [6])), - array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'LargeList(List(List(Int64)))'), make_array([1])), - array_has(arrow_cast(make_array([[[1]]]), 'LargeList(List(List(List(Int64))))'), make_array([[1]])), - array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'LargeList(List(List(List(Int64))))'), make_array([[2]])), - array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'LargeList(List(List(List(Int64))))'), make_array([[1], [2]])), - list_has(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), 4), - array_contains(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), 3), - list_contains(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), 0) -; ----- -true true true true true false true false true false true false - -query BBBBBBBBBBBB -select array_has(arrow_cast(make_array(1,2), 'FixedSizeList(2, Int64)'), 1), - array_has(arrow_cast(make_array(1,2,NULL), 'FixedSizeList(3, Int64)'), 1), - array_has(arrow_cast(make_array([2,3], [3,4]), 'FixedSizeList(2, List(Int64))'), make_array(2,3)), - array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'FixedSizeList(2, List(List(Int64)))'), make_array([1], [2,3])), - array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'FixedSizeList(2, List(List(Int64)))'), make_array([4,5], [6])), - array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'FixedSizeList(2, List(List(Int64)))'), make_array([1])), - array_has(arrow_cast(make_array([[[1]]]), 'FixedSizeList(1, List(List(List(Int64))))'), make_array([[1]])), - array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'FixedSizeList(2, List(List(List(Int64))))'), make_array([[2]])), - array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'FixedSizeList(2, List(List(List(Int64))))'), make_array([[1], [2]])), - list_has(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), 4), - array_contains(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), 3), - list_contains(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), 0) -; ----- -true true true true true false true false true false true false - -query BBB -select array_has(column1, column2), - array_has_all(column3, column4), - array_has_any(column5, column6) -from array_has_table_1D; ----- -true true true -false false false - -query BBB -select array_has(arrow_cast(column1, 'LargeList(Int64)'), column2), - array_has_all(arrow_cast(column3, 'LargeList(Int64)'), arrow_cast(column4, 'LargeList(Int64)')), - array_has_any(arrow_cast(column5, 'LargeList(Int64)'), arrow_cast(column6, 'LargeList(Int64)')) -from array_has_table_1D; ----- -true true true -false false false - -query B -select array_has(column1, column2) -from array_has_table_null; ----- -true -true -false -false -false - -# array_has([1, 3, 5], 1) -> true (array contains element) -# array_has([], 1) -> false (empty array, not null) -# array_has(null, 1) -> null (null array) -query BB -select array_has(column1, column2), array_has(null, column2) -from array_has_table_empty; ----- -true NULL -false NULL -NULL NULL - -# Test for issue: array_has should return false for empty arrays, not null -# This test demonstrates the correct behavior with COALESCE to show the distinction -# array_has([1, 3, 5], 1) -> 'true' -# array_has([], 1) -> 'false' (empty array should return false) -# array_has(null, 1) -> 'null' (null array should return null) -query ?T -SELECT column1, COALESCE(CAST(array_has(column1, column2) AS VARCHAR), 'null') -from array_has_table_empty; ----- -[1, 3, 5] true -[] false -NULL null - -query B -select array_has(column1, column2) -from fixed_size_array_has_table_1D; ----- -true -false - -query BB -select array_has_all(column3, column4), - array_has_any(column5, column6) -from fixed_size_array_has_table_1D; ----- -true true -false false - -query BBB -select array_has(column1, column2), - array_has_all(column3, column4), - array_has_any(column5, column6) -from array_has_table_1D_Float; ----- -true true false -false false true - -query BBB -select array_has(arrow_cast(column1, 'LargeList(Float64)'), column2), - array_has_all(arrow_cast(column3, 'LargeList(Float64)'), arrow_cast(column4, 'LargeList(Float64)')), - array_has_any(arrow_cast(column5, 'LargeList(Float64)'), arrow_cast(column6, 'LargeList(Float64)')) -from array_has_table_1D_Float; ----- -true true false -false false true - -query B -select array_has(column1, column2) -from fixed_size_array_has_table_1D_Float; ----- -true -false - -query BB -select array_has_all(column3, column4), - array_has_any(column5, column6) -from fixed_size_array_has_table_1D_Float; ----- -true true -false true - -query BBB -select array_has(column1, column2), - array_has_all(column3, column4), - array_has_any(column5, column6) -from array_has_table_1D_Boolean; ----- -false true true -true true true - -query BBB -select array_has(arrow_cast(column1, 'LargeList(Boolean)'), column2), - array_has_all(arrow_cast(column3, 'LargeList(Boolean)'), arrow_cast(column4, 'LargeList(Boolean)')), - array_has_any(arrow_cast(column5, 'LargeList(Boolean)'), arrow_cast(column6, 'LargeList(Boolean)')) -from array_has_table_1D_Boolean; ----- -false true true -true true true - -query B -select array_has(column1, column2) -from fixed_size_array_has_table_1D_Boolean; ----- -false -true - -query BB -select array_has_all(column3, column4), - array_has_any(column5, column6) -from fixed_size_array_has_table_1D_Boolean; ----- -true true -true true - -query BBBBBBBB -select array_has_all(column3, arrow_cast(column4,'LargeList(Boolean)')), - array_has_any(column5, arrow_cast(column6,'LargeList(Boolean)')), - array_has_all(column3, arrow_cast(column4,'List(Boolean)')), - array_has_any(column5, arrow_cast(column6,'List(Boolean)')), - array_has_all(arrow_cast(column3, 'LargeList(Boolean)'), column4), - array_has_any(arrow_cast(column5, 'LargeList(Boolean)'), column6), - array_has_all(arrow_cast(column3, 'List(Boolean)'), column4), - array_has_any(arrow_cast(column5, 'List(Boolean)'), column6) -from fixed_size_array_has_table_1D_Boolean; ----- -true true true true true true true true -true true true true true true true true - -query BBB -select array_has(column1, column2), - array_has_all(column3, column4), - array_has_any(column5, column6) -from array_has_table_1D_UTF8; ----- -true true false -false false true - -query BBB -select array_has(arrow_cast(column1, 'LargeList(Utf8)'), column2), - array_has_all(arrow_cast(column3, 'LargeList(Utf8)'), arrow_cast(column4, 'LargeList(Utf8)')), - array_has_any(arrow_cast(column5, 'LargeList(Utf8)'), arrow_cast(column6, 'LargeList(Utf8)')) -from array_has_table_1D_UTF8; ----- -true true false -false false true - -query B -select array_has(column1, column2) -from fixed_size_array_has_table_1D_UTF8; ----- -true -false - -query BB -select array_has(column1, column2), - array_has_all(column3, column4) -from array_has_table_2D; ----- -false true -true false - -query BB -select array_has(arrow_cast(column1, 'LargeList(List(Int64))'), column2), - array_has_all(arrow_cast(column3, 'LargeList(List(Int64))'), arrow_cast(column4, 'LargeList(List(Int64))')) -from array_has_table_2D; ----- -false true -true false - -query B -select array_has(arrow_cast(column1, 'LargeList(List(Int64))'), column2) -from fixed_size_array_has_table_2D; ----- -false -false - -query B -select array_has_all(arrow_cast(column3, 'LargeList(List(Int64))'), arrow_cast(column4, 'LargeList(List(Int64))')) -from fixed_size_array_has_table_2D; ----- -true -false - -query B -select array_has_all(column1, column2) -from array_has_table_2D_float; ----- -true -false - -query B -select array_has_all(arrow_cast(column1, 'LargeList(List(Float64))'), arrow_cast(column2, 'LargeList(List(Float64))')) -from array_has_table_2D_float; ----- -true -false - -query B -select array_has_all(column1, column2) -from fixed_size_array_has_table_2D_float; ----- -false -false - -query B -select array_has(column1, column2) from array_has_table_3D; ----- -false -true -false -false -true -false -true - -query B -select array_has(arrow_cast(column1, 'LargeList(List(List(Int64)))'), column2) from array_has_table_3D; ----- -false -true -false -false -true -false -true - -query B -select array_has(column1, column2) from fixed_size_array_has_table_3D; ----- -false -false -false -false -true -true -true - -query BBBB -select array_has(column1, make_array(5, 6)), - array_has(column1, make_array(7, NULL)), - array_has(column2, 5.5), - array_has(column3, 'o') -from arrays; ----- -false false false true -true false true false -true false false true -false true false false -NULL NULL false false -false false NULL false -false false false NULL - -query BBBB -select array_has(arrow_cast(column1, 'LargeList(List(Int64))'), make_array(5, 6)), - array_has(arrow_cast(column1, 'LargeList(List(Int64))'), make_array(7, NULL)), - array_has(arrow_cast(column2, 'LargeList(Float64)'), 5.5), - array_has(arrow_cast(column3, 'LargeList(Utf8)'), 'o') -from arrays; ----- -false false false true -true false true false -true false false true -false true false false -NULL NULL false false -false false NULL false -false false false NULL - -# Row 1: [[NULL,2],[3,NULL]], [1.1,2.2,3.3], ['L','o','r','e','m'] -# Row 2: [[3,4],[5,6]], [NULL,5.5,6.6], ['i','p',NULL,'u','m'] -# Row 3: [[5,6],[7,8]], [7.7,8.8,9.9], ['d',NULL,'l','o','r'] -# Row 4: [[7,NULL],[9,10]], [10.1,NULL,12.2], ['s','i','t','a','b'] -# Row 5: NULL, [13.3,14.4,15.5], ['a','m','e','t','x'] -# Row 6: [[11,12],[13,14]], NULL, [',','a','b','c','d'] -# Row 7: [[15,16],[NULL,18]], [16.6,17.7,18.8], NULL -query BBBB -select array_has(column1, make_array(5, 6)), - array_has(column1, make_array(7, NULL)), - array_has(column2, 5.5), - array_has(column3, 'o') -from fixed_size_arrays; ----- -false false false true -true false true false -true false false true -false true false false -NULL NULL false false -false false NULL false -false false false NULL - -query BBBB -select array_has_all(make_array(1,2,3), []), - array_has_any(make_array(1,2,3), []), - array_has_all(make_array('aa','bb','cc'), []), - array_has_any(make_array('aa','bb','cc'), []) -; ----- -true false true false - -query BBBBBBBBBBBBB -select array_has_all(make_array(1,2,3), make_array(1,3)), - array_has_all(make_array(1,2,3), make_array(1,4)), - array_has_all(make_array([1,2], [3,4]), make_array([1,2])), - array_has_all(make_array([1,2], [3,4]), make_array([1,3])), - array_has_all(make_array([1,2], [3,4]), make_array([1,2], [3,4], [5,6])), - array_has_all(make_array([[1,2,3]]), make_array([[1]])), - array_has_all(make_array([[1,2,3]]), make_array([[1,2,3]])), - array_has_any(make_array(1,2,3), make_array(1,10,100)), - array_has_any(make_array(1,2,3), make_array(10,100)), - array_has_any(make_array([1,2], [3,4]), make_array([1,10], [10,4])), - array_has_any(make_array([1,2], [3,4]), make_array([10,20], [3,4])), - array_has_any(make_array([[1,2,3]]), make_array([[1,2,3], [4,5,6]])), - array_has_any(make_array([[1,2,3]]), make_array([[1,2,3]], [[4,5,6]])) -; ----- -true false true false false false true true false false true false true - -query BBBBBBBBBBBBB -select array_has_all(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), arrow_cast(make_array(1,3), 'LargeList(Int64)')), - array_has_all(arrow_cast(make_array(1,2,3),'LargeList(Int64)'), arrow_cast(make_array(1,4), 'LargeList(Int64)')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,2]), 'LargeList(List(Int64))')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,3]), 'LargeList(List(Int64))')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,2], [3,4], [5,6]), 'LargeList(List(Int64))')), - array_has_all(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1]]), 'LargeList(List(List(Int64)))')), - array_has_all(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))')), - array_has_any(arrow_cast(make_array(1,2,3),'LargeList(Int64)'), arrow_cast(make_array(1,10,100), 'LargeList(Int64)')), - array_has_any(arrow_cast(make_array(1,2,3),'LargeList(Int64)'), arrow_cast(make_array(10,100),'LargeList(Int64)')), - array_has_any(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,10], [10,4]), 'LargeList(List(Int64))')), - array_has_any(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([10,20], [3,4]), 'LargeList(List(Int64))')), - array_has_any(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1,2,3], [4,5,6]]), 'LargeList(List(List(Int64)))')), - array_has_any(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1,2,3]], [[4,5,6]]), 'LargeList(List(List(Int64)))')) -; ----- -true false true false false false true true false false true false true - -query BBBBBBBBBBBBB -select array_has_all(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 3), 'FixedSizeList(2, Int64)')), - array_has_all(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 4), 'FixedSizeList(2, Int64)')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,2]), 'FixedSizeList(1, List(Int64))')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,3]), 'FixedSizeList(1, List(Int64))')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,2], [3,4], [5,6]), 'FixedSizeList(3, List(Int64))')), - array_has_all(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1]]), 'FixedSizeList(1, List(List(Int64)))')), - array_has_all(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))')), - array_has_any(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(1,10,100), 'FixedSizeList(3, Int64)')), - array_has_any(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(10, 100),'FixedSizeList(2, Int64)')), - array_has_any(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,10], [10,4]), 'FixedSizeList(2, List(Int64))')), - array_has_any(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([10,20], [3,4]), 'FixedSizeList(2, List(Int64))')), - array_has_any(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3], [4,5,6]]), 'FixedSizeList(1, List(List(Int64)))')), - array_has_any(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3]], [[4,5,6]]), 'FixedSizeList(2, List(List(Int64)))')) -; ----- -true false true false false false true true false false true false true - -# rewrite various array_has operations to InList where the haystack is a literal list -# NB that `col in (a, b, c)` is simplified to OR if there are <= 3 elements, so we make 4-element haystack lists - -query I -with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE needle IN ('7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'); ----- -1 - -query TT -explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE needle IN ('7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'); ----- -logical_plan -01)Projection: count(Int64(1)) AS count(*) -02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] -03)----SubqueryAlias: test -04)------SubqueryAlias: t -05)--------Projection: -06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) -07)------------TableScan: generate_series() projection=[value] -physical_plan -01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] -02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] -03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] -05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] -06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] - -query I -with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE needle = ANY(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c']); ----- -1 - -query TT -explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE needle = ANY(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c']); ----- -logical_plan -01)Projection: count(Int64(1)) AS count(*) -02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] -03)----SubqueryAlias: test -04)------SubqueryAlias: t -05)--------Projection: -06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) -07)------------TableScan: generate_series() projection=[value] -physical_plan -01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] -02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] -03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] -05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] -06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] - -query I -with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], needle); ----- -1 - -query TT -explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], needle); ----- -logical_plan -01)Projection: count(Int64(1)) AS count(*) -02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] -03)----SubqueryAlias: test -04)------SubqueryAlias: t -05)--------Projection: -06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) -07)------------TableScan: generate_series() projection=[value] -physical_plan -01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] -02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] -03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] -05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] -06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] - -query I -with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'LargeList(Utf8View)'), needle); ----- -1 - -query TT -explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'LargeList(Utf8View)'), needle); ----- -logical_plan -01)Projection: count(Int64(1)) AS count(*) -02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] -03)----SubqueryAlias: test -04)------SubqueryAlias: t -05)--------Projection: -06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) -07)------------TableScan: generate_series() projection=[value] -physical_plan -01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] -02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] -03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] -05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] -06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] - -query I -with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'FixedSizeList(4, Utf8View)'), needle); ----- -1 - -query TT -explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'FixedSizeList(4, Utf8View)'), needle); ----- -logical_plan -01)Projection: count(Int64(1)) AS count(*) -02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] -03)----SubqueryAlias: test -04)------SubqueryAlias: t -05)--------Projection: -06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) -07)------------TableScan: generate_series() projection=[value] -physical_plan -01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] -02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] -03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] -05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] -06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] - -query I -with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has([needle], needle); ----- -100000 - -# The optimizer does not currently eliminate the filter; -# Instead, it's rewritten as `IS NULL OR NOT NULL` due to SQL null semantics -query TT -explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has([needle], needle); ----- -logical_plan -01)Projection: count(Int64(1)) AS count(*) -02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] -03)----SubqueryAlias: test -04)------SubqueryAlias: t -05)--------Projection: -06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IS NOT NULL OR Boolean(NULL) -07)------------TableScan: generate_series() projection=[value] -physical_plan -01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] -02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] -03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] -05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IS NOT NULL OR NULL, projection=[] -06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] - -# any operator -query ? -select column3 from arrays where 'L'=any(column3); ----- -[L, o, r, e, m] - -query I -select count(*) from arrays where 'L'=any(column3); ----- -1 - -query I -select count(*) from arrays where 'X'=any(column3); ----- -0 - -# any operator with comparison operators -# Use inline arrays so the test data is visible and the needle (5) -# falls within the range of some arrays but not others. -statement ok -CREATE TABLE any_op_test AS VALUES - (1, make_array(1, 2, 3)), - (2, make_array(4, 5, 6)), - (3, make_array(7, 8, 9)), - (4, make_array(3, 5, 7)); - -# 5 > ANY(arr): true when array_min < 5 -# row1: min=1 < 5 ✓, row2: min=4 < 5 ✓, row3: min=7 < 5 ✗, row4: min=3 < 5 ✓ -query I? -select column1, column2 from any_op_test where 5 > any(column2) order by column1; ----- -1 [1, 2, 3] -2 [4, 5, 6] -4 [3, 5, 7] - -# 5 >= ANY(arr): true when array_min <= 5 -# row1: min=1 <= 5 ✓, row2: min=4 <= 5 ✓, row3: min=7 <= 5 ✗, row4: min=3 <= 5 ✓ -query I? -select column1, column2 from any_op_test where 5 >= any(column2) order by column1; ----- -1 [1, 2, 3] -2 [4, 5, 6] -4 [3, 5, 7] - -# 5 < ANY(arr): true when array_max > 5 -# row1: max=3 > 5 ✗, row2: max=6 > 5 ✓, row3: max=9 > 5 ✓, row4: max=7 > 5 ✓ -query I? -select column1, column2 from any_op_test where 5 < any(column2) order by column1; ----- -2 [4, 5, 6] -3 [7, 8, 9] -4 [3, 5, 7] - -# 5 <= ANY(arr): true when array_max >= 5 -# row1: max=3 >= 5 ✗, row2: max=6 >= 5 ✓, row3: max=9 >= 5 ✓, row4: max=7 >= 5 ✓ -query I? -select column1, column2 from any_op_test where 5 <= any(column2) order by column1; ----- -2 [4, 5, 6] -3 [7, 8, 9] -4 [3, 5, 7] - -# 5 <> ANY(arr): true when array_min != 5 OR array_max != 5 -# row1: [1,2,3] min=1!=5 ✓, row2: [4,5,6] min=4!=5 ✓, row3: [7,8,9] min=7!=5 ✓, row4: [3,5,7] min=3!=5 ✓ -query I? -select column1, column2 from any_op_test where 5 <> any(column2) order by column1; ----- -1 [1, 2, 3] -2 [4, 5, 6] -3 [7, 8, 9] -4 [3, 5, 7] - -# For a single-element array where the element equals the needle, <> should return false -query B -select 5 <> any(make_array(5)); ----- -false - -# For a uniform array [5,5,5], <> should also return false -query B -select 5 <> any(make_array(5, 5, 5)); ----- -false - -# Empty array: all operators should return false (no elements satisfy the condition) -query B -select 5 = any(make_array()); ----- -false - -query B -select 5 <> any(make_array()); ----- -false - -query B -select 5 > any(make_array()); ----- -false - -query B -select 5 < any(make_array()); ----- -false - -query B -select 5 >= any(make_array()); ----- -false - -query B -select 5 <= any(make_array()); ----- -false - -# Mixed NULL + non-NULL array where no non-NULL element satisfies the condition -# These return false (NULLs are skipped by array_min/array_max) -query B -select 5 > any(make_array(6, NULL)); ----- -false - -query B -select 5 < any(make_array(3, NULL)); ----- -false - -query B -select 5 >= any(make_array(6, NULL)); ----- -false - -query B -select 5 <= any(make_array(3, NULL)); ----- -false - -# Mixed NULL + non-NULL array where a non-NULL element satisfies the condition -query B -select 5 > any(make_array(3, NULL)); ----- -true - -query B -select 5 < any(make_array(6, NULL)); ----- -true - -query B -select 5 >= any(make_array(5, NULL)); ----- -true - -query B -select 5 <= any(make_array(5, NULL)); ----- -true - -query B -select 5 <> any(make_array(3, NULL)); ----- -true - -query B -select 5 <> any(make_array(5, NULL)); ----- -false - -# All-NULL array: all operators should return false -query B -select 5 > any(make_array(NULL::INT, NULL::INT)); ----- -false - -query B -select 5 < any(make_array(NULL::INT, NULL::INT)); ----- -false - -query B -select 5 >= any(make_array(NULL::INT, NULL::INT)); ----- -false - -query B -select 5 <= any(make_array(NULL::INT, NULL::INT)); ----- -false - -query B -select 5 <> any(make_array(NULL::INT, NULL::INT)); ----- -false - -# NULL left operand: should return NULL for non-empty arrays -query B -select NULL > any(make_array(1, 2, 3)); ----- -NULL - -query B -select NULL < any(make_array(1, 2, 3)); ----- -NULL - -query B -select NULL >= any(make_array(1, 2, 3)); ----- -NULL - -query B -select NULL <= any(make_array(1, 2, 3)); ----- -NULL - -query B -select NULL <> any(make_array(1, 2, 3)); ----- -NULL - -# NULL left operand with empty array: should return false -query B -select NULL > any(make_array()); ----- -false - -# NULL array: should return NULL -query B -select 5 > any(NULL::INT[]); ----- -NULL - -query B -select 5 < any(NULL::INT[]); ----- -NULL - -query B -select 5 >= any(NULL::INT[]); ----- -NULL - -query B -select 5 <= any(NULL::INT[]); ----- -NULL - -query B -select 5 <> any(NULL::INT[]); ----- -NULL - -statement ok -DROP TABLE any_op_test; - -## array_distinct - -#TODO: https://github.com/apache/datafusion/issues/7142 -#query ? -#select array_distinct(null); -#---- -#NULL - -# test with empty row, the row that does not match the condition has row count 0 -statement ok -create table t1(a int, b int) as values (100, 1), (101, 2), (102, 3), (101, 2); - -# rowsort is to ensure the order of group by is deterministic, array_sort has no effect here, since the sum() always returns single row. -query ? rowsort -select array_distinct([sum(a)]) from t1 where a > 100 group by b; ----- -[102] -[202] - -statement ok -drop table t1; - -query ? -select array_distinct(a) from values ([1, 2, 3]), (null), ([1, 3, 1]) as X(a); ----- -[1, 2, 3] -NULL -[1, 3] - -query ? -select array_distinct(arrow_cast(null, 'LargeList(Int64)')); ----- -NULL - -query ? -select array_distinct([]); ----- -[] - -query ? -select array_distinct([[], []]); ----- -[[]] - -query ? -select array_distinct(column1) -from array_distinct_table_1D; ----- -[1, 2, 3] -[1, 2, 3, 4, 5] -[3, 5] - -query ? -select array_distinct(column1) -from array_distinct_table_1D_UTF8; ----- -[a, bc, def] -[a, bc, def, defg] -[defg] - -query ? -select array_distinct(column1) -from array_distinct_table_2D; ----- -[[1, 2], [3, 4], [5, 6]] -[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] -[[5, 6], NULL] - -query ? -select array_distinct(column1) -from array_distinct_table_1D_large; ----- -[1, 2, 3] -[1, 2, 3, 4, 5] -[3, 5] - -query ? -select array_distinct(column1) -from array_distinct_table_1D_fixed; ----- -[1, 2, 3] -[1, 2, 3, 4, 5] -[3, 5] - -query ? -select array_distinct(column1) -from array_distinct_table_1D_UTF8_fixed; ----- -[a, bc, def] -[a, bc, def, defg] -[defg] - -query ? -select array_distinct(column1) -from array_distinct_table_2D_fixed; ----- -[[1, 2], [3, 4], [5, 6]] -[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] -[[5, 6], NULL] - -## arrays_zip (aliases: `list_zip`) - -# Spark example: arrays_zip(array(1, 2, 3), array(2, 3, 4)) -query ? -select arrays_zip([1, 2, 3], [2, 3, 4]); ----- -[{1: 1, 2: 2}, {1: 2, 2: 3}, {1: 3, 2: 4}] - -# Spark example: arrays_zip(array(1, 2), array(2, 3), array(3, 4)) -query ? -select arrays_zip([1, 2], [2, 3], [3, 4]); ----- -[{1: 1, 2: 2, 3: 3}, {1: 2, 2: 3, 3: 4}] - -# basic: two integer arrays of equal length -query ? -select arrays_zip([1, 2, 3], [10, 20, 30]); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] - -# basic: two arrays with different element types (int + string) -query ? -select arrays_zip([1, 2, 3], ['a', 'b', 'c']); ----- -[{1: 1, 2: a}, {1: 2, 2: b}, {1: 3, 2: c}] - -# three arrays of equal length -query ? -select arrays_zip([1, 2, 3], [10, 20, 30], [100, 200, 300]); ----- -[{1: 1, 2: 10, 3: 100}, {1: 2, 2: 20, 3: 200}, {1: 3, 2: 30, 3: 300}] - -# four arrays of equal length -query ? -select arrays_zip([1], [2], [3], [4]); ----- -[{1: 1, 2: 2, 3: 3, 4: 4}] - -# mixed element types: float + boolean -query ? -select arrays_zip([1.5, 2.5], [true, false]); ----- -[{1: 1.5, 2: true}, {1: 2.5, 2: false}] - -# different length arrays: shorter array padded with NULLs -query ? -select arrays_zip([1, 2], [3, 4, 5]); ----- -[{1: 1, 2: 3}, {1: 2, 2: 4}, {1: NULL, 2: 5}] - -# different length arrays: first longer -query ? -select arrays_zip([1, 2, 3], [10]); ----- -[{1: 1, 2: 10}, {1: 2, 2: NULL}, {1: 3, 2: NULL}] - -# different length: one single element, other three elements -query ? -select arrays_zip([1], ['a', 'b', 'c']); ----- -[{1: 1, 2: a}, {1: NULL, 2: b}, {1: NULL, 2: c}] - -# empty arrays -query ? -select arrays_zip([], []); ----- -[] - -# one empty, one non-empty -query ? -select arrays_zip([], [1, 2, 3]); ----- -[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}] - -# NULL elements inside arrays -query ? -select arrays_zip([1, NULL, 3], ['a', 'b', 'c']); ----- -[{1: 1, 2: a}, {1: NULL, 2: b}, {1: 3, 2: c}] - -# all NULL elements -query ? -select arrays_zip([NULL::int, NULL, NULL], [NULL::text, NULL, NULL]); ----- -[{1: NULL, 2: NULL}, {1: NULL, 2: NULL}, {1: NULL, 2: NULL}] - -# both args are NULL (entire list null) -query ? -select arrays_zip(NULL::int[], NULL::int[]); ----- -NULL - -# one arg is NULL list, other is real array -query ? -select arrays_zip(NULL::int[], [1, 2, 3]); ----- -[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}] - -# real array + NULL list -query ? -select arrays_zip([1, 2], NULL::text[]); ----- -[{1: 1, 2: NULL}, {1: 2, 2: NULL}] - -# column-level test with multiple rows -query ? -select arrays_zip(a, b) from (values ([1, 2], [10, 20]), ([3, 4, 5], [30]), ([6], [60, 70])) as t(a, b); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}] -[{1: 3, 2: 30}, {1: 4, 2: NULL}, {1: 5, 2: NULL}] -[{1: 6, 2: 60}, {1: NULL, 2: 70}] - -# column-level test with NULL rows -query ? -select arrays_zip(a, b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}] -[{1: NULL, 2: 30}, {1: NULL, 2: 40}] -[{1: 5, 2: NULL}, {1: 6, 2: NULL}] - -# column-level test with single argument -query ? -select arrays_zip(a) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); ----- -[{1: 1}, {1: 2}] -NULL -[{1: 5}, {1: 6}] - -query ? -select arrays_zip(b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); ----- -[{1: 10}, {1: 20}] -[{1: 30}, {1: 40}] -NULL - -# No input -query error Error during planning: 'arrays_zip' does not support zero arguments -select arrays_zip(); - -# Non-array input -query error DataFusion error: Execution error: arrays_zip expects array arguments, got Int64 -select arrays_zip(1, 2); - -# null input -query ? -select arrays_zip(null) ----- -NULL - -# single empty array -query ? -select arrays_zip([]) ----- -[] - - -# single array of null -query ? -select arrays_zip([null]) ----- -[{1: NULL}] - -query ? -select arrays_zip([NULL::int]) ----- -[{1: NULL}] - -query ? -select arrays_zip([NULL::int[]]) ----- -[{1: NULL}] - -# alias: list_zip -query ? -select list_zip([1, 2], [3, 4]); ----- -[{1: 1, 2: 3}, {1: 2, 2: 4}] - -# column test: total values equal (3 each) but per-row lengths differ -# a: [1] b: [10, 20] → row 0: a has 1, b has 2 -# a: [2, 3] b: [30] → row 1: a has 2, b has 1 -# total a values = 3, total b values = 3 (same!) but rows are misaligned -query ? -select arrays_zip(a, b) from (values ([1], [10, 20]), ([2, 3], [30])) as t(a, b); ----- -[{1: 1, 2: 10}, {1: NULL, 2: 20}] -[{1: 2, 2: 30}, {1: 3, 2: NULL}] - -# single element arrays -query ? -select arrays_zip([42], ['hello']); ----- -[{1: 42, 2: hello}] - -# single argument -query ? -select arrays_zip([1, 2, 3]); ----- -[{1: 1}, {1: 2}, {1: 3}] - -# arrays_zip with LargeList inputs -query ? -select arrays_zip( - arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), - arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)') -); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] - -# arrays_zip with LargeList different lengths (padding) -query ? -select arrays_zip( - arrow_cast(make_array(1, 2), 'LargeList(Int64)'), - arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)') -); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: NULL, 2: 30}] - -# single argument from LargeList -query ? -select arrays_zip(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')); ----- -[{1: 1}, {1: 2}, {1: 3}] - -# arrays_zip with FixedSizeList inputs -query ? -select arrays_zip( - arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), - arrow_cast(make_array(10, 20, 30), 'FixedSizeList(3, Int64)') -); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] - -# single argument from FixedSizeList -query ? -select arrays_zip(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')); ----- -[{1: 1}, {1: 2}, {1: 3}] - -# arrays_zip mixing List and LargeList -query ? -select arrays_zip( - [1, 2, 3], - arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)') -); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] - -# arrays_zip mixing List and FixedSizeList with different lengths (padding) -query ? -select arrays_zip( - [1, 2, 3], - arrow_cast(make_array(10, 20), 'FixedSizeList(2, Int64)') -); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: NULL}] - -# arrays_zip with LargeList and FixedSizeList mixed types -query ? -select arrays_zip( - arrow_cast(make_array(1, 2), 'LargeList(Int64)'), - arrow_cast(make_array('a', 'b'), 'FixedSizeList(2, Utf8)') -); ----- -[{1: 1, 2: a}, {1: 2, 2: b}] - -query ??? -select array_intersect(column1, column2), - array_intersect(column3, column4), - array_intersect(column5, column6) -from array_intersect_table_1D; ----- -[1] [1, 3] [1, 3] -[11] [11, 33] [11, 33] - -query ??? -select array_intersect(column1, column2), - array_intersect(column3, column4), - array_intersect(column5, column6) -from large_array_intersect_table_1D; ----- -[1] [1, 3] [1, 3] -[11] [11, 33] [11, 33] - -query ??? -select array_intersect(column1, column2), - array_intersect(column3, column4), - array_intersect(column5, column6) -from array_intersect_table_1D_Float; ----- -[1.0] [1.0, 3.0] [] -[] [2.0] [1.11] - -query ??? -select array_intersect(column1, column2), - array_intersect(column3, column4), - array_intersect(column5, column6) -from array_intersect_table_1D_Boolean; ----- -[] [true, false] [false] -[false] [true] [true] - -query ??? -select array_intersect(column1, column2), - array_intersect(column3, column4), - array_intersect(column5, column6) -from large_array_intersect_table_1D_Boolean; ----- -[] [true, false] [false] -[false] [true] [true] - -query ??? -select array_intersect(column1, column2), - array_intersect(column3, column4), - array_intersect(column5, column6) -from array_intersect_table_1D_UTF8; ----- -[bc] [rust, arrow] [] -[] [datafusion, rust, arrow] [rust, arrow] - -query ??? -select array_intersect(column1, column2), - array_intersect(column3, column4), - array_intersect(column5, column6) -from large_array_intersect_table_1D_UTF8; ----- -[bc] [rust, arrow] [] -[] [datafusion, rust, arrow] [rust, arrow] - -query ? -select array_intersect(column1, column2) -from array_intersect_table_1D_NULL; ----- -[2, 3] -[3] -[3] -NULL -NULL -NULL - -query ?? -select array_intersect(column1, column2), - array_intersect(column3, column4) -from array_intersect_table_2D; ----- -[] [[4, 5], [6, 7]] -[[3, 4]] [[5, 6, 7], [8, 9, 10]] - -query ?? -select array_intersect(column1, column2), - array_intersect(column3, column4) -from large_array_intersect_table_2D; ----- -[] [[4, 5], [6, 7]] -[[3, 4]] [[5, 6, 7], [8, 9, 10]] - - -query ? -select array_intersect(column1, column2) -from array_intersect_table_2D_float; ----- -[[1.1, 2.2], [3.3]] -[[1.1, 2.2], [3.3]] - -query ? -select array_intersect(column1, column2) -from large_array_intersect_table_2D_float; ----- -[[1.1, 2.2], [3.3]] -[[1.1, 2.2], [3.3]] - -query ? -select array_intersect(column1, column2) -from array_intersect_table_3D; ----- -[] -[[[1, 2]]] - -query ? -select array_intersect(column1, column2) -from large_array_intersect_table_3D; ----- -[] -[[[1, 2]]] - -query ?????? -SELECT array_intersect(make_array(1,2,3), make_array(2,3,4)), - array_intersect(make_array(1,3,5), make_array(2,4,6)), - array_intersect(make_array('aa','bb','cc'), make_array('cc','aa','dd')), - array_intersect(make_array(true, false), make_array(true)), - array_intersect(make_array(1.1, 2.2, 3.3), make_array(2.2, 3.3, 4.4)), - array_intersect(make_array([1, 1], [2, 2], [3, 3]), make_array([2, 2], [3, 3], [4, 4])) -; ----- -[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] - -query ?????? -SELECT array_intersect(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), arrow_cast(make_array(2,3,4), 'LargeList(Int64)')), - array_intersect(arrow_cast(make_array(1,3,5), 'LargeList(Int64)'), arrow_cast(make_array(2,4,6), 'LargeList(Int64)')), - array_intersect(arrow_cast(make_array('aa','bb','cc'), 'LargeList(Utf8)'), arrow_cast(make_array('cc','aa','dd'), 'LargeList(Utf8)')), - array_intersect(arrow_cast(make_array(true, false), 'LargeList(Boolean)'), arrow_cast(make_array(true), 'LargeList(Boolean)')), - array_intersect(arrow_cast(make_array(1.1, 2.2, 3.3), 'LargeList(Float64)'), arrow_cast(make_array(2.2, 3.3, 4.4), 'LargeList(Float64)')), - array_intersect(arrow_cast(make_array([1, 1], [2, 2], [3, 3]), 'LargeList(List(Int64))'), arrow_cast(make_array([2, 2], [3, 3], [4, 4]), 'LargeList(List(Int64))')) -; ----- -[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] - -query ?????? -SELECT array_intersect(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(2,3,4), 'FixedSizeList(3, Int64)')), - array_intersect(arrow_cast(make_array(1,3,5), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(2,4,6), 'FixedSizeList(3, Int64)')), - array_intersect(arrow_cast(make_array('aa','bb','cc'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('cc','aa','dd'), 'FixedSizeList(3, Utf8)')), - array_intersect(arrow_cast(make_array(true, false), 'FixedSizeList(2, Boolean)'), arrow_cast(make_array(true), 'FixedSizeList(1, Boolean)')), - array_intersect(arrow_cast(make_array(1.1, 2.2, 3.3), 'FixedSizeList(3, Float64)'), arrow_cast(make_array(2.2, 3.3, 4.4), 'FixedSizeList(3, Float64)')), - array_intersect(arrow_cast(make_array([1, 1], [2, 2], [3, 3]), 'FixedSizeList(3, List(Int64))'), arrow_cast(make_array([2, 2], [3, 3], [4, 4]), 'FixedSizeList(3, List(Int64))')) -; ----- -[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] - -query ? -select array_intersect([], []); ----- -[] - -query ? -select array_intersect(arrow_cast([], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Int64)')); ----- -[] - -query ? -select array_intersect([1, 1, 2, 2, 3, 3], null); ----- -NULL - -query ? -select array_intersect(arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)'), null); ----- -NULL - -query ? -select array_intersect(null, [1, 1, 2, 2, 3, 3]); ----- -NULL - -query ? -select array_intersect(null, arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)')); ----- -NULL - -query ? -select array_intersect([], null); ----- -NULL - -query ? -select array_intersect([[1,2,3]], [[]]); ----- -[] - -query ? -select array_intersect([[null]], [[]]); ----- -[] - -query ? -select array_intersect(arrow_cast([], 'LargeList(Int64)'), null); ----- -NULL - -query ? -select array_intersect(null, []); ----- -NULL - -query ? -select array_intersect(null, arrow_cast([], 'LargeList(Int64)')); ----- -NULL - -query ? -select array_intersect(null, null); ----- -NULL - -query ?????? -SELECT list_intersect(make_array(1,2,3), make_array(2,3,4)), - list_intersect(make_array(1,3,5), make_array(2,4,6)), - list_intersect(make_array('aa','bb','cc'), make_array('cc','aa','dd')), - list_intersect(make_array(true, false), make_array(true)), - list_intersect(make_array(1.1, 2.2, 3.3), make_array(2.2, 3.3, 4.4)), - list_intersect(make_array([1, 1], [2, 2], [3, 3]), make_array([2, 2], [3, 3], [4, 4])) -; ----- -[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] - -query ?????? -SELECT list_intersect(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), arrow_cast(make_array(2,3,4), 'LargeList(Int64)')), - list_intersect(arrow_cast(make_array(1,3,5), 'LargeList(Int64)'), arrow_cast(make_array(2,4,6), 'LargeList(Int64)')), - list_intersect(arrow_cast(make_array('aa','bb','cc'), 'LargeList(Utf8)'), arrow_cast(make_array('cc','aa','dd'), 'LargeList(Utf8)')), - list_intersect(arrow_cast(make_array(true, false), 'LargeList(Boolean)'), arrow_cast(make_array(true), 'LargeList(Boolean)')), - list_intersect(arrow_cast(make_array(1.1, 2.2, 3.3), 'LargeList(Float64)'), arrow_cast(make_array(2.2, 3.3, 4.4), 'LargeList(Float64)')), - list_intersect(arrow_cast(make_array([1, 1], [2, 2], [3, 3]), 'LargeList(List(Int64))'), arrow_cast(make_array([2, 2], [3, 3], [4, 4]), 'LargeList(List(Int64))')) -; ----- -[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] - -query BBBB -select list_has_all(make_array(1,2,3), make_array(4,5,6)), - list_has_all(make_array(1,2,3), make_array(1,2)), - list_has_any(make_array(1,2,3), make_array(4,5,6)), - list_has_any(make_array(1,2,3), make_array(1,2,4)) -; ----- -false true false true - -query BBBB -select arrays_overlap(make_array(1,2,3), make_array(4,5,6)), - arrays_overlap(make_array(1,2,3), make_array(1,2,4)), - arrays_overlap(make_array(['aa']), make_array(['aa'],['bb'])), - arrays_overlap(make_array('aa',NULL), make_array('bb',NULL)) -; ----- -false true true true - -query ??? -select range(column2), - range(column1, column2), - range(column1, column2, column3) -from arrays_range; ----- -[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] [3, 4, 5, 6, 7, 8, 9] [3, 5, 7, 9] -[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] [4, 5, 6, 7, 8, 9, 10, 11, 12] [4, 7, 10] - -query ??????????? -select range(5), - range(2, 5), - range(2, 10, 3), - range(10, 2, -3), - range(1, 5, -1), - range(1, -5, 1), - range(1, -5, -1), - range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH), - range(DATE '1993-02-01', DATE '1993-01-01', INTERVAL '-1' DAY), - range(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '1' YEAR), - range(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR) -; ----- -[0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8] [10, 7, 4] [] [] [1, 0, -1, -2, -3, -4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02] [1989-04-01, 1990-04-01, 1991-04-01] [] - -# Ensure can coerce from other valid types -query ??????????? -select range(5), - range(2, 5), - range(2, 10, 3), - range(10, 2, -3), - range(arrow_cast(1, 'Int8'), 5, -1), - range(arrow_cast(1, 'Int16'), arrow_cast(-5, 'Int8'), 1), - range(arrow_cast(1, 'Int32'), arrow_cast(-5, 'Int16'), arrow_cast(-1, 'Int8')), - range(DATE '1992-09-01', DATE '1993-03-01', arrow_cast('1 MONTH', 'Interval(YearMonth)')), - range(DATE '1993-02-01', arrow_cast(DATE '1993-01-01', 'Date64'), INTERVAL '-1' DAY), - range(arrow_cast(DATE '1989-04-01', 'Date64'), DATE '1993-03-01', INTERVAL '1' YEAR), - range(arrow_cast(DATE '1993-03-01', 'Date64'), arrow_cast(DATE '1989-04-01', 'Date64'), INTERVAL '1' YEAR) -; ----- -[0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8] [10, 7, 4] [] [] [1, 0, -1, -2, -3, -4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02] [1989-04-01, 1990-04-01, 1991-04-01] [] - -# Test range with zero step -query error DataFusion error: Execution error: step can't be 0 for function range\(start \[, stop, step\]\) -select range(1, 1, 0); - -# Test range with big steps -query ???? -select - range(-9223372036854775808, -9223372036854775808, -9223372036854775808) as c1, - range(9223372036854775807, 9223372036854775807, 9223372036854775807) as c2, - range(0, -9223372036854775808, -9223372036854775808) as c3, - range(0, 9223372036854775807, 9223372036854775807) as c4; ----- -[] [] [0] [0] - -# Test range for other edge cases -query ???????? -select - range(9223372036854775807, 9223372036854775807, -1) as c1, - range(9223372036854775807, 9223372036854775806, -1) as c2, - range(9223372036854775807, 9223372036854775807, 1) as c3, - range(9223372036854775806, 9223372036854775807, 1) as c4, - range(-9223372036854775808, -9223372036854775808, -1) as c5, - range(-9223372036854775807, -9223372036854775808, -1) as c6, - range(-9223372036854775808, -9223372036854775808, 1) as c7, - range(-9223372036854775808, -9223372036854775807, 1) as c8; ----- -[] [9223372036854775807] [] [9223372036854775806] [] [-9223372036854775807] [] [-9223372036854775808] - -# Test range(start, stop, step) with NULL values -query ? -select range(start, stop, step) from - (values (1), (NULL)) as start_values(start), - (values (10), (NULL)) as stop_values(stop), - (values (3), (NULL)) as step_values(step) -where start is null or stop is null or step is null ----- -NULL -NULL -NULL -NULL -NULL -NULL -NULL - -# Test range(start, stop) with NULL values -query ? -select range(start, stop) from - (values (1), (NULL)) as start_values(start), - (values (10), (NULL)) as stop_values(stop) -where start is null or stop is null ----- -NULL -NULL -NULL - -# Test range(stop) with NULL value -query ? -select range(NULL) ----- -NULL - -## should return NULL -query ? -select range(DATE '1992-09-01', NULL, INTERVAL '1' YEAR); ----- -NULL - -## should return NULL -query ? -select range(TIMESTAMP '1992-09-01', NULL, INTERVAL '1' YEAR); ----- -NULL - -query ? -select range(DATE '1992-09-01', DATE '1993-03-01', NULL); ----- -NULL - -query ? -select range(TIMESTAMP '1992-09-01', TIMESTAMP '1993-03-01', NULL); ----- -NULL - -query ? -select range(NULL, DATE '1993-03-01', INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(NULL::Date, DATE '1993-03-01', INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(DATE '1993-03-01', NULL::Date, INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(DATE '1993-02-01', DATE '1993-03-01', NULL::Interval); ----- -NULL - -query ? -select range(NULL, TIMESTAMP '1993-03-01', INTERVAL '1' YEAR); ----- -NULL - -query ? -select range(NULL, NULL, NULL); ----- -NULL - -query ? -select range(NULL::timestamp, NULL::timestamp, NULL); ----- -NULL - -query ? -select range(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR) ----- -[] - -query ? -select range(TIMESTAMP '1989-04-01', TIMESTAMP '1993-03-01', INTERVAL '-1' YEAR) ----- -[] - -query ? -select range(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR) ----- -[] - -query ? -select range(TIMESTAMP '1993-03-01', TIMESTAMP '1989-04-01', INTERVAL '1' YEAR) ----- -[] - -query error DataFusion error: Execution error: Cannot generate date range less than 1 day\. -select range(DATE '1993-03-01', DATE '1993-03-01', INTERVAL '1' HOUR) - -query ? -select range(TIMESTAMP '1993-03-01', TIMESTAMP '1993-03-01', INTERVAL '1' HOUR) ----- -[] - -query ????????? -select generate_series(5), - generate_series(2, 5), - generate_series(2, 10, 3), - generate_series(1, 5, 1), - generate_series(5, 1, -1), - generate_series(10, 2, -3), - generate_series(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH), - generate_series(DATE '1993-02-01', DATE '1993-01-01', INTERVAL '-1' DAY), - generate_series(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '1' YEAR) -; ----- -[0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] [1, 2, 3, 4, 5] [5, 4, 3, 2, 1] [10, 7, 4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01, 1993-03-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01] [1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] - -query ? -select generate_series('2021-01-01'::timestamp, '2021-01-01T15:00:00'::timestamp, INTERVAL '1' HOUR); ----- -[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00] - -# Other timestamp types are coerced to nanosecond -query ? -select generate_series(arrow_cast('2021-01-01'::timestamp, 'Timestamp(s)'), '2021-01-01T15:00:00'::timestamp, INTERVAL '1' HOUR); ----- -[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00] - -query ? -select generate_series('2021-01-01'::timestamp, arrow_cast('2021-01-01T15:00:00'::timestamp, 'Timestamp(µs)'), INTERVAL '1' HOUR); ----- -[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00] - -query ? -select generate_series('2021-01-01T00:00:00EST'::timestamp, '2021-01-01T15:00:00-12:00'::timestamp, INTERVAL '1' HOUR); ----- -[2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00, 2021-01-01T16:00:00, 2021-01-01T17:00:00, 2021-01-01T18:00:00, 2021-01-01T19:00:00, 2021-01-01T20:00:00, 2021-01-01T21:00:00, 2021-01-01T22:00:00, 2021-01-01T23:00:00, 2021-01-02T00:00:00, 2021-01-02T01:00:00, 2021-01-02T02:00:00, 2021-01-02T03:00:00] - -query ? -select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), arrow_cast('2021-01-01T15:00:00', 'Timestamp(Nanosecond, Some("+05:00"))'), INTERVAL '1' HOUR); ----- -[2021-01-01T00:00:00-05:00, 2021-01-01T01:00:00-05:00, 2021-01-01T02:00:00-05:00, 2021-01-01T03:00:00-05:00, 2021-01-01T04:00:00-05:00, 2021-01-01T05:00:00-05:00] - -## -5500000000 ns is -5.5 sec -query ? -select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), arrow_cast('2021-01-01T06:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), INTERVAL '1 HOUR 30 MINUTE -5500000000 NANOSECOND'); ----- -[2021-01-01T00:00:00-05:00, 2021-01-01T01:29:54.500-05:00, 2021-01-01T02:59:49-05:00, 2021-01-01T04:29:43.500-05:00, 2021-01-01T05:59:38-05:00] - -## mixing types for timestamps is not supported -query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature -select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), DATE '2021-01-02', INTERVAL '1' HOUR); - -## mixing types not allowed even if an argument is null -query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature -select generate_series(TIMESTAMP '1992-09-01', DATE '1993-03-01', NULL); - -query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature -select generate_series(1, '2024-01-01', '2025-01-02'); - -query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature -select generate_series('2024-01-01'::timestamp, '2025-01-02', interval '1 day'); - -## should return NULL -query ? -select generate_series(DATE '1992-09-01', NULL, INTERVAL '1' YEAR); ----- -NULL - -## should return NULL -query ? -select generate_series(TIMESTAMP '1992-09-01', NULL, INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(DATE '1992-09-01', DATE '1993-03-01', NULL); ----- -NULL - -query ? -select generate_series(NULL, DATE '1993-03-01', INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(NULL::Date, DATE '1993-03-01', INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(DATE '1993-03-01', NULL::Date, INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(DATE '1993-02-01', DATE '1993-03-01', NULL::Interval); ----- -NULL - -query ? -select generate_series(NULL, TIMESTAMP '1993-03-01', INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(NULL, NULL, NULL); ----- -NULL - -query ? -select generate_series(NULL::timestamp, NULL::timestamp, NULL); ----- -NULL - -query ? -select generate_series(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR) ----- -[] - -query ? -select generate_series(TIMESTAMP '1989-04-01', TIMESTAMP '1993-03-01', INTERVAL '-1' YEAR) ----- -[] - -query ? -select generate_series(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR) ----- -[] - -query ? -select generate_series(TIMESTAMP '1993-03-01', TIMESTAMP '1989-04-01', INTERVAL '1' YEAR) ----- -[] - -query error DataFusion error: Execution error: Cannot generate date range less than 1 day. -select generate_series(DATE '2000-01-01', DATE '2000-01-03', INTERVAL '1' HOUR) - -query error DataFusion error: Execution error: Cannot generate date range less than 1 day. -select generate_series(DATE '2000-01-01', DATE '2000-01-03', INTERVAL '-1' HOUR) - -query ? -select generate_series(TIMESTAMP '2000-01-01', TIMESTAMP '2000-01-02', INTERVAL '1' HOUR) ----- -[2000-01-01T00:00:00, 2000-01-01T01:00:00, 2000-01-01T02:00:00, 2000-01-01T03:00:00, 2000-01-01T04:00:00, 2000-01-01T05:00:00, 2000-01-01T06:00:00, 2000-01-01T07:00:00, 2000-01-01T08:00:00, 2000-01-01T09:00:00, 2000-01-01T10:00:00, 2000-01-01T11:00:00, 2000-01-01T12:00:00, 2000-01-01T13:00:00, 2000-01-01T14:00:00, 2000-01-01T15:00:00, 2000-01-01T16:00:00, 2000-01-01T17:00:00, 2000-01-01T18:00:00, 2000-01-01T19:00:00, 2000-01-01T20:00:00, 2000-01-01T21:00:00, 2000-01-01T22:00:00, 2000-01-01T23:00:00, 2000-01-02T00:00:00] - -query ? -select generate_series(TIMESTAMP '2000-01-02', TIMESTAMP '2000-01-01', INTERVAL '-1' HOUR) ----- -[2000-01-02T00:00:00, 2000-01-01T23:00:00, 2000-01-01T22:00:00, 2000-01-01T21:00:00, 2000-01-01T20:00:00, 2000-01-01T19:00:00, 2000-01-01T18:00:00, 2000-01-01T17:00:00, 2000-01-01T16:00:00, 2000-01-01T15:00:00, 2000-01-01T14:00:00, 2000-01-01T13:00:00, 2000-01-01T12:00:00, 2000-01-01T11:00:00, 2000-01-01T10:00:00, 2000-01-01T09:00:00, 2000-01-01T08:00:00, 2000-01-01T07:00:00, 2000-01-01T06:00:00, 2000-01-01T05:00:00, 2000-01-01T04:00:00, 2000-01-01T03:00:00, 2000-01-01T02:00:00, 2000-01-01T01:00:00, 2000-01-01T00:00:00] - -# Test generate_series with small intervals -query ? -select generate_series('2000-01-01T00:00:00.000000001Z'::timestamp, '2000-01-01T00:00:00.00000001Z'::timestamp, INTERVAL '1' NANOSECONDS) ----- -[2000-01-01T00:00:00.000000001, 2000-01-01T00:00:00.000000002, 2000-01-01T00:00:00.000000003, 2000-01-01T00:00:00.000000004, 2000-01-01T00:00:00.000000005, 2000-01-01T00:00:00.000000006, 2000-01-01T00:00:00.000000007, 2000-01-01T00:00:00.000000008, 2000-01-01T00:00:00.000000009, 2000-01-01T00:00:00.000000010] - -# Test generate_series with zero step -query error DataFusion error: Execution error: step can't be 0 for function generate_series\(start \[, stop, step\]\) -select generate_series(1, 1, 0); - -# Test generate_series with zero step -query error DataFusion error: Execution error: Interval argument to generate_series must not be 0 -select generate_series(TIMESTAMP '2000-01-02', TIMESTAMP '2000-01-01', INTERVAL '0' MINUTE); - -# Test generate_series with big steps -query ???? -select - generate_series(-9223372036854775808, -9223372036854775808, -9223372036854775808) as c1, - generate_series(9223372036854775807, 9223372036854775807, 9223372036854775807) as c2, - generate_series(0, -9223372036854775808, -9223372036854775808) as c3, - generate_series(0, 9223372036854775807, 9223372036854775807) as c4; ----- -[-9223372036854775808] [9223372036854775807] [0, -9223372036854775808] [0, 9223372036854775807] - - -# Test generate_series for other edge cases -query ???? -select - generate_series(9223372036854775807, 9223372036854775807, -1) as c1, - generate_series(9223372036854775807, 9223372036854775807, 1) as c2, - generate_series(-9223372036854775808, -9223372036854775808, -1) as c3, - generate_series(-9223372036854775808, -9223372036854775808, 1) as c4; ----- -[9223372036854775807] [9223372036854775807] [-9223372036854775808] [-9223372036854775808] - -# Test generate_series(start, stop, step) with NULL values -query ? -select generate_series(start, stop, step) from - (values (1), (NULL)) as start_values(start), - (values (10), (NULL)) as stop_values(stop), - (values (3), (NULL)) as step_values(step) -where start is null or stop is null or step is null ----- -NULL -NULL -NULL -NULL -NULL -NULL -NULL - -# Test generate_series(start, stop) with NULL values -query ? -select generate_series(start, stop) from - (values (1), (NULL)) as start_values(start), - (values (10), (NULL)) as stop_values(stop) -where start is null or stop is null ----- -NULL -NULL -NULL - -# Test generate_series(stop) with NULL value -query ? -select generate_series(NULL) ----- -NULL - -# Test generate_series with a table of date values -statement ok -CREATE TABLE date_table( - start DATE, - stop DATE, - step INTERVAL -) AS VALUES - (DATE '1992-01-01', DATE '1993-01-02', INTERVAL '1' MONTH), - (DATE '1993-02-01', DATE '1993-01-01', INTERVAL '-1' DAY), - (DATE '1989-04-01', DATE '1993-03-01', INTERVAL '1' YEAR); - -query ? -select generate_series(start, stop, step) from date_table; ----- -[1992-01-01, 1992-02-01, 1992-03-01, 1992-04-01, 1992-05-01, 1992-06-01, 1992-07-01, 1992-08-01, 1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01] -[1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01] -[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] - -query ? -select generate_series(start, stop, INTERVAL '1 year') from date_table; ----- -[1992-01-01, 1993-01-01] -[] -[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] - -query ? -select generate_series(start, '1993-03-01'::date, INTERVAL '1 year') from date_table; ----- -[1992-01-01, 1993-01-01] -[1993-02-01] -[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] - -# Test generate_series with a table of timestamp values -statement ok -CREATE TABLE timestamp_table( - start TIMESTAMP, - stop TIMESTAMP, - step INTERVAL -) AS VALUES - (TIMESTAMP '1992-01-01T00:00:00', TIMESTAMP '1993-01-02T00:00:00', INTERVAL '1' MONTH), - (TIMESTAMP '1993-02-01T00:00:00', TIMESTAMP '1993-01-01T00:00:00', INTERVAL '-1' DAY), - (TIMESTAMP '1989-04-01T00:00:00', TIMESTAMP '1993-03-01T00:00:00', INTERVAL '1' YEAR); - -query ? -select generate_series(start, stop, step) from timestamp_table; ----- -[1992-01-01T00:00:00, 1992-02-01T00:00:00, 1992-03-01T00:00:00, 1992-04-01T00:00:00, 1992-05-01T00:00:00, 1992-06-01T00:00:00, 1992-07-01T00:00:00, 1992-08-01T00:00:00, 1992-09-01T00:00:00, 1992-10-01T00:00:00, 1992-11-01T00:00:00, 1992-12-01T00:00:00, 1993-01-01T00:00:00] -[1993-02-01T00:00:00, 1993-01-31T00:00:00, 1993-01-30T00:00:00, 1993-01-29T00:00:00, 1993-01-28T00:00:00, 1993-01-27T00:00:00, 1993-01-26T00:00:00, 1993-01-25T00:00:00, 1993-01-24T00:00:00, 1993-01-23T00:00:00, 1993-01-22T00:00:00, 1993-01-21T00:00:00, 1993-01-20T00:00:00, 1993-01-19T00:00:00, 1993-01-18T00:00:00, 1993-01-17T00:00:00, 1993-01-16T00:00:00, 1993-01-15T00:00:00, 1993-01-14T00:00:00, 1993-01-13T00:00:00, 1993-01-12T00:00:00, 1993-01-11T00:00:00, 1993-01-10T00:00:00, 1993-01-09T00:00:00, 1993-01-08T00:00:00, 1993-01-07T00:00:00, 1993-01-06T00:00:00, 1993-01-05T00:00:00, 1993-01-04T00:00:00, 1993-01-03T00:00:00, 1993-01-02T00:00:00, 1993-01-01T00:00:00] -[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] - -query ? -select generate_series(start, stop, INTERVAL '1 year') from timestamp_table; ----- -[1992-01-01T00:00:00, 1993-01-01T00:00:00] -[] -[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] - -query ? -select generate_series(start, '1993-03-01T00:00:00'::timestamp, INTERVAL '1 year') from timestamp_table; ----- -[1992-01-01T00:00:00, 1993-01-01T00:00:00] -[1993-02-01T00:00:00] -[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] - -# https://github.com/apache/datafusion/issues/11922 -query ? -select generate_series(start, '1993-03-01T00:00:00'::timestamp, INTERVAL '1 year') from timestamp_table; ----- -[1992-01-01T00:00:00, 1993-01-01T00:00:00] -[1993-02-01T00:00:00] -[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] - -## array_except - -statement ok -CREATE TABLE array_except_table -AS VALUES - ([1, 2, 2, 3], [2, 3, 4]), - ([2, 3, 3], [3]), - ([3], [3, 3, 4]), - (null, [3, 4]), - ([1, 2], null), - (null, null) -; - -query ? -select array_except(column1, column2) from array_except_table; ----- -[1] -[2] -[] -NULL -NULL -NULL - -statement ok -drop table array_except_table; - -statement ok -CREATE TABLE array_except_nested_list_table -AS VALUES - ([[1, 2], [3]], [[2], [3], [4, 5]]), - ([[1, 2], [3]], [[2], [1, 2]]), - ([[1, 2], [3]], null), - (null, [[1], [2, 3], [4, 5, 6]]), - ([[1], [2, 3], [4, 5, 6]], [[2, 3], [4, 5, 6], [1]]) -; - -query ? -select array_except(column1, column2) from array_except_nested_list_table; ----- -[[1, 2]] -[[3]] -NULL -NULL -[] - -statement ok -drop table array_except_nested_list_table; - -statement ok -CREATE TABLE array_except_table_float -AS VALUES - ([1.1, 2.2, 3.3], [2.2]), - ([1.1, 2.2, 3.3], [4.4]), - ([1.1, 2.2, 3.3], [3.3, 2.2, 1.1]) -; - -query ? -select array_except(column1, column2) from array_except_table_float; ----- -[1.1, 3.3] -[1.1, 2.2, 3.3] -[] - -statement ok -drop table array_except_table_float; - -statement ok -CREATE TABLE array_except_table_ut8 -AS VALUES - (['a', 'b', 'c'], ['a']), - (['a', 'bc', 'def'], ['g', 'def']), - (['a', 'bc', 'def'], null), - (null, ['a']) -; - -query ? -select array_except(column1, column2) from array_except_table_ut8; ----- -[b, c] -[a, bc] -NULL -NULL - -statement ok -drop table array_except_table_ut8; - -statement ok -CREATE TABLE array_except_table_bool -AS VALUES - ([true, false, false], [false]), - ([true, true, true], [false]), - ([false, false, false], [true]), - ([true, false], null), - (null, [true, false]) -; - -query ? -select array_except(column1, column2) from array_except_table_bool; ----- -[true] -[true] -[false] -NULL -NULL - -statement ok -drop table array_except_table_bool; - -query ? -select array_except([], null); ----- -NULL - -query ? -select array_except([], []); ----- -[] - -query ? -select array_except(null, []); ----- -NULL - -query ? -select array_except(null, null) ----- -NULL - -query ? -select array_except(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 6, 3, 4], 'LargeList(Int64)')); ----- -[1, 2] - -query ? -select array_except(arrow_cast([1, 2, 3, 4], 'FixedSizeList(4, Int64)'), arrow_cast([5, 6, 3, 4], 'FixedSizeList(4, Int64)')); ----- -[1, 2] - -### Array operators tests - - -## array concatenate operator - -# array concatenate operator with scalars #1 (like array_concat scalar function) -query ?? -select make_array(1, 2, 3) || make_array(4, 5, 6) || make_array(7, 8, 9), make_array([1], [2]) || make_array([3], [4]); ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] - -# array concatenate operator with scalars #2 (like array_append scalar function) -query ??? -select make_array(1, 2, 3) || 4, make_array(1.0, 2.0, 3.0) || 4.0, make_array('h', 'e', 'l', 'l') || 'o'; ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array concatenate operator with scalars #3 (like array_prepend scalar function) -query ??? -select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' || make_array('e', 'l', 'l', 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array concatenate operator with scalars #4 (mixed) -query ? -select 0 || [1,2,3] || 4 || [5] || [6,7]; ----- -[0, 1, 2, 3, 4, 5, 6, 7] - -# array concatenate operator with nd-list #5 (mixed) -query ? -select 0 || [1,2,3] || [[4,5]] || [[6,7,8]] || [9,10]; ----- -[[0, 1, 2, 3], [4, 5], [6, 7, 8], [9, 10]] - -# array concatenate operator non-valid cases -## concat 2D with scalar is not valid -query error -select 0 || [1,2,3] || [[4,5]] || [[6,7,8]] || [9,10] || 11; - -## concat scalar with 2D is not valid -query error -select 0 || [[1,2,3]]; - -# array concatenate operator with column - -statement ok -CREATE TABLE array_concat_operator_table -AS VALUES - (0, [1, 2, 2, 3], 4, [5, 6, 5]), - (-1, [4, 5, 6], 7, [8, 1, 1]) -; - -query ? -select column1 || column2 || column3 || column4 from array_concat_operator_table; ----- -[0, 1, 2, 2, 3, 4, 5, 6, 5] -[-1, 4, 5, 6, 7, 8, 1, 1] - -statement ok -drop table array_concat_operator_table; - -## array containment operator - -# array containment operator with scalars #1 (at arrow) -query BBBBBBB -select make_array(1,2,3) @> make_array(1,3), - make_array(1,2,3) @> make_array(1,4), - make_array([1,2], [3,4]) @> make_array([1,2]), - make_array([1,2], [3,4]) @> make_array([1,3]), - make_array([1,2], [3,4]) @> make_array([1,2], [3,4], [5,6]), - make_array([[1,2,3]]) @> make_array([[1]]), - make_array([[1,2,3]]) @> make_array([[1,2,3]]); ----- -true false true false false false true - -# Make sure it is rewritten to function array_has_all() -query TT -explain select [1,2,3] @> [1,3]; ----- -logical_plan -01)Projection: Boolean(true) AS array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3))) -02)--EmptyRelation: rows=1 -physical_plan -01)ProjectionExec: expr=[true as array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3)))] -02)--PlaceholderRowExec - -# array containment operator with scalars #2 (arrow at) -query BBBBBBB -select make_array(1,3) <@ make_array(1,2,3), - make_array(1,4) <@ make_array(1,2,3), - make_array([1,2]) <@ make_array([1,2], [3,4]), - make_array([1,3]) <@ make_array([1,2], [3,4]), - make_array([1,2], [3,4], [5,6]) <@ make_array([1,2], [3,4]), - make_array([[1]]) <@ make_array([[1,2,3]]), - make_array([[1,2,3]]) <@ make_array([[1,2,3]]); ----- -true false true false false false true - -# Make sure it is rewritten to function array_has_all() -query TT -explain select [1,3] <@ [1,2,3]; ----- -logical_plan -01)Projection: Boolean(true) AS array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3))) -02)--EmptyRelation: rows=1 -physical_plan -01)ProjectionExec: expr=[true as array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3)))] -02)--PlaceholderRowExec - -### Array casting tests - - -## make_array - -# make_array scalar function #1 -query ? -select make_array(1, 2.0) ----- -[1.0, 2.0] - -# make_array scalar function #2 -query ? -select make_array(null, 1.0) ----- -[NULL, 1.0] - -# make_array scalar function #3 -query ? -select make_array(1, 2.0, null, 3) ----- -[1.0, 2.0, NULL, 3.0] - -# make_array scalar function #4 -query ? -select make_array(1.0, '2', null) ----- -[1.0, 2.0, NULL] - -### FixedSizeListArray - -statement ok -CREATE EXTERNAL TABLE fixed_size_list_array STORED AS PARQUET LOCATION '../core/tests/data/fixed_size_list_array.parquet'; - -query T -select arrow_typeof(f0) from fixed_size_list_array; ----- -FixedSizeList(2 x Int64) -FixedSizeList(2 x Int64) - -query ? -select * from fixed_size_list_array; ----- -[1, 2] -[3, 4] - -query ? -select f0 from fixed_size_list_array; ----- -[1, 2] -[3, 4] - -query ? -select arrow_cast(f0, 'List(Int64)') from fixed_size_list_array; ----- -[1, 2] -[3, 4] - -query ? -select make_array(arrow_cast(f0, 'List(Int64)')) from fixed_size_list_array ----- -[[1, 2]] -[[3, 4]] - -query T -select arrow_typeof(make_array(arrow_cast(f0, 'List(Int64)'))) from fixed_size_list_array ----- -List(List(Int64)) -List(List(Int64)) - -query ? -select make_array(f0) from fixed_size_list_array ----- -[[1, 2]] -[[3, 4]] - -query T -select arrow_typeof(make_array(f0)) from fixed_size_list_array ----- -List(FixedSizeList(2 x Int64)) -List(FixedSizeList(2 x Int64)) - -query ? -select array_concat(column1, [7]) from arrays_values_v2; ----- -[NULL, 2, 3, 7] -[7] -[9, NULL, 10, 7] -[NULL, 1, 7] -[11, 12, 7] -[7] - -# flatten - -query ? -select flatten(NULL); ----- -NULL - -# flatten with scalar values #1 -query ??? -select flatten(make_array(1, 2, 1, 3, 2)), - flatten(make_array([1], [2, 3], [null], make_array(4, null, 5))), - flatten(make_array([[1.1]], [[2.2]], [[3.3], [4.4]])); ----- -[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] - -query ??? -select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'LargeList(Int64)')), - flatten(arrow_cast(make_array([1], null, [2, 3], [null], make_array(4, null, 5)), 'LargeList(LargeList(Int64))')), - flatten(arrow_cast(make_array([[1.1]], [[2.2]], [[3.3], [4.4]]), 'LargeList(LargeList(LargeList(Float64)))')); ----- -[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] - -query ??? -select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'FixedSizeList(5, Int64)')), - flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, List(Int64))')), - flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'FixedSizeList(2, List(List(Float64)))')); ----- -[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] - -query ??TT -select flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, LargeList(Int64))')), - flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'List(LargeList(FixedSizeList(1, Float64)))')), - arrow_typeof(flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, LargeList(Int64))'))), - arrow_typeof(flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'List(LargeList(FixedSizeList(1, Float64)))'))); ----- -[1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] LargeList(Int64) LargeList(FixedSizeList(1 x Float64)) - -# flatten with column values -query ???? -select flatten(column1), - flatten(column2), - flatten(column3), - flatten(column4) -from flatten_table; ----- -[1, 2, 3] [[1, 2, 3], [4, 5], [6]] [[[1]], [[2, 3]]] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] -[1, 2, 3, 4, 5, 6] [[8]] [[[1, 2]], [[3]]] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] - -query ???? -select flatten(column1), - flatten(column2), - flatten(column3), - flatten(column4) -from large_flatten_table; ----- -[1, 2, 3] [[1, 2, 3], [4, 5], [6]] [[[1]], [[2, 3]]] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] -[1, 2, 3, 4, 5, 6] [[8]] [[[1, 2]], [[3]]] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] - -query ???? -select flatten(column1), - flatten(column2), - flatten(column3), - flatten(column4) -from fixed_size_flatten_table; ----- -[1, 2, 3] [[1, 2, 3], [4, 5], [6]] [[[1]], [[2, 3]]] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] -[1, 2, 3, 4, 5, 6] [[8], [9, 10], [11, 12, 13]] [[[1, 2]], [[3]]] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] - -# flatten with different inner list type -query ?????? -select flatten(arrow_cast(make_array([1, 2], [3, 4]), 'List(FixedSizeList(2, Int64))')), - flatten(arrow_cast(make_array([[1, 2]], [[3, 4]]), 'List(FixedSizeList(1, List(Int64)))')), - flatten(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), - flatten(arrow_cast(make_array([[1, 2]], [[3, 4]]), 'LargeList(List(List(Int64)))')), - flatten(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(FixedSizeList(2, Int64))')), - flatten(arrow_cast(make_array([[1, 2]], [[3, 4]]), 'LargeList(FixedSizeList(1, List(Int64)))')) ----- -[1, 2, 3, 4] [[1, 2], [3, 4]] [1, 2, 3, 4] [[1, 2], [3, 4]] [1, 2, 3, 4] [[1, 2], [3, 4]] - -## empty (aliases: `array_empty`, `list_empty`) -# empty scalar function #1 -query B -select empty(make_array(1)); ----- -false - -query B -select empty(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -false - -query B -select empty(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); ----- -false - -# empty scalar function #2 -query B -select empty(make_array()); ----- -true - -query B -select empty(arrow_cast(make_array(), 'LargeList(Int64)')); ----- -true - -#TODO: https://github.com/apache/datafusion/issues/9158 -#query B -#select empty(arrow_cast(make_array(), 'FixedSizeList(0, Null)')); -#---- -#true - -# empty scalar function #3 -query B -select empty(make_array(NULL)); ----- -false - -query B -select empty(arrow_cast(make_array(NULL), 'LargeList(Int64)')); ----- -false - -query B -select empty(arrow_cast(make_array(NULL), 'FixedSizeList(1, Int64)')); ----- -false - -#TODO: https://github.com/apache/datafusion/issues/7142 -# empty scalar function #4 -#query B -#select empty(NULL); -#---- -#NULL - -# empty scalar function #5 -query B -select empty(column1) from arrays; ----- -false -false -false -false -NULL -false -false - -query B -select empty(arrow_cast(column1, 'LargeList(List(Int64))')) from arrays; ----- -false -false -false -false -NULL -false -false - -query B -select empty(column1) from fixed_size_arrays; ----- -false -false -false -false -NULL -false -false - -## array_empty (aliases: `empty`, `list_empty`) -# array_empty scalar function #1 -query B -select array_empty(make_array(1)); ----- -false - -query B -select array_empty(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -false - -# array_empty scalar function #2 -query B -select array_empty(make_array()); ----- -true - -query B -select array_empty(arrow_cast(make_array(), 'LargeList(Int64)')); ----- -true - -# array_empty scalar function #3 -query B -select array_empty(make_array(NULL)); ----- -false - -query B -select array_empty(arrow_cast(make_array(NULL), 'LargeList(Int64)')); ----- -false - -## list_empty (aliases: `empty`, `array_empty`) -# list_empty scalar function #1 -query B -select list_empty(make_array(1)); ----- -false - -query B -select list_empty(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -false - -# list_empty scalar function #2 -query B -select list_empty(make_array()); ----- -true - -query B -select list_empty(arrow_cast(make_array(), 'LargeList(Int64)')); ----- -true - -# list_empty scalar function #3 -query B -select list_empty(make_array(NULL)); ----- -false - -query B -select list_empty(arrow_cast(make_array(NULL), 'LargeList(Int64)')); ----- -false - -# string_to_array scalar function -query ? -SELECT string_to_array('abcxxxdef', 'xxx') ----- -[abc, def] - -query I -SELECT cardinality(string_to_array('', ',')) ----- -0 - -query I -SELECT cardinality(string_to_array('', '')) ----- -0 - -query I -SELECT cardinality(string_to_array('', ',', 'x')) ----- -0 - -query I -SELECT cardinality(string_to_array('', '', 'x')) ----- -0 - -query ? -SELECT string_to_array('abc', '') ----- -[abc] - -query ? -SELECT string_to_array('abc', NULL) ----- -[a, b, c] - -query ? -SELECT string_to_array('abc def', ' ', 'def') ----- -[abc, NULL] - -query ? -select string_to_array(e, ',') from values; ----- -[Lorem] -[ipsum] -[dolor] -[sit] -[amet] -[, ] -[consectetur] -[adipiscing] -NULL - -# karge string tests for string_to_array - -# string_to_array scalar function -query ? -SELECT string_to_array(arrow_cast('abcxxxdef', 'LargeUtf8'), 'xxx') ----- -[abc, def] - -# string_to_array scalar function -query ? -SELECT string_to_array(arrow_cast('abcxxxdef', 'LargeUtf8'), arrow_cast('xxx', 'LargeUtf8')) ----- -[abc, def] - -query ? -SELECT string_to_array(arrow_cast('abc', 'LargeUtf8'), NULL) ----- -[a, b, c] - -query ? -select string_to_array(arrow_cast(e, 'LargeUtf8'), ',') from values; ----- -[Lorem] -[ipsum] -[dolor] -[sit] -[amet] -[, ] -[consectetur] -[adipiscing] -NULL - -query ? -select string_to_array(arrow_cast(e, 'LargeUtf8'), ',', arrow_cast('Lorem', 'LargeUtf8')) from values; ----- -[NULL] -[ipsum] -[dolor] -[sit] -[amet] -[, ] -[consectetur] -[adipiscing] -NULL - -# string view tests for string_to_array - -# string_to_array scalar function -query ? -SELECT string_to_array(arrow_cast('abcxxxdef', 'Utf8View'), 'xxx') ----- -[abc, def] - -query ? -SELECT string_to_array(arrow_cast('abc', 'Utf8View'), NULL) ----- -[a, b, c] - -query ? -select string_to_array(arrow_cast(e, 'Utf8View'), ',') from values; ----- -[Lorem] -[ipsum] -[dolor] -[sit] -[amet] -[, ] -[consectetur] -[adipiscing] -NULL - -# test string_to_array aliases - -query ? -select string_to_list(e, 'm') from values; ----- -[Lore, ] -[ipsu, ] -[dolor] -[sit] -[a, et] -[,] -[consectetur] -[adipiscing] -NULL - -# string_to_array: single-char delimiter producing multiple elements -query ? -SELECT string_to_array('a,b,c', ',') ----- -[a, b, c] - -# string_to_array: delimiter not found in input -query ? -SELECT string_to_array('abc', ',') ----- -[abc] - -# string_to_array: empty string input -query ? -SELECT string_to_array('', ',') ----- -[] - -# string_to_array: null_str matching multiple elements -query ? -SELECT string_to_array('a,NULL,b,NULL,c', ',', 'NULL') ----- -[a, NULL, b, NULL, c] - -# string_to_array: null_str matching all elements -query ? -SELECT string_to_array('x,x,x', ',', 'x') ----- -[NULL, NULL, NULL] - -# string_to_array: null_str with empty-string delimiter -query ? -SELECT string_to_array('abc', '', 'abc') ----- -[NULL] - -# string_to_array: NULL string input -query ? -SELECT string_to_array(NULL, ',') ----- -NULL - -# string_to_array: columnar delimiter -query ?? -SELECT string_to_array('a,b,c', col1), string_to_array('a::b::c', col2) - FROM (VALUES (',', '::')) AS t(col1, col2) ----- -[a, b, c] [a, b, c] - -# string_to_array: columnar null_str -query ? -SELECT string_to_array('a,NULL,b', ',', col1) - FROM (VALUES ('NULL')) AS t(col1) ----- -[a, NULL, b] - -# string_to_array: adjacent delimiters produce empty strings -query ? -SELECT string_to_array('a,,b', ',') ----- -[a, , b] - -# string_to_array: delimiter at start and end -query ? -SELECT string_to_array(',a,b,', ',') ----- -[, a, b, ] - -# array_resize scalar function #1 -query ? -select array_resize(make_array(1, 2, 3), 1); ----- -[1] - -query ? -select array_resize(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 1); ----- -[1] - -query ? -select array_resize(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 1); ----- -[1] - -# array_resize scalar function #2 -query ? -select array_resize(make_array(1, 2, 3), 5); ----- -[1, 2, 3, NULL, NULL] - -query ? -select array_resize(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 5); ----- -[1, 2, 3, NULL, NULL] - -query ? -select array_resize(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 5); ----- -[1, 2, 3, NULL, NULL] - -# array_resize scalar function #3 -query ? -select array_resize(make_array(1, 2, 3), 5, 4); ----- -[1, 2, 3, 4, 4] - -query ? -select array_resize(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 5, 4); ----- -[1, 2, 3, 4, 4] - -# array_resize scalar function #4 -query error -select array_resize(make_array(1, 2, 3), -5, 2); - -# array_resize scalar function #5 -query ? -select array_resize(make_array(1.1, 2.2, 3.3), 10, 9.9); ----- -[1.1, 2.2, 3.3, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9] - -query ? -select array_resize(arrow_cast(make_array(1.1, 2.2, 3.3), 'LargeList(Float64)'), 10, 9.9); ----- -[1.1, 2.2, 3.3, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9] - -# array_resize scalar function #5 -query ? -select array_resize(column1, column2, column3) from arrays_values; ----- -[NULL] -[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 2, 2] -[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] -[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] -NULL -[] -[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] - -query ? -select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from arrays_values; ----- -[NULL] -[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 2, 2] -[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] -[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] -NULL -[] -[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] - -# array_resize scalar function #5 -query ? -select array_resize([[1], [2], [3]], 10, [5]); ----- -[[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]] - -query ? -select array_resize(arrow_cast([[1], [2], [3]], 'LargeList(List(Int64))'), 10, [5]); ----- -[[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]] - -# array_resize null value -query ? -select array_resize(arrow_cast(NULL, 'List(Int8)'), 1); ----- -NULL - -statement ok -CREATE TABLE array_resize_values -AS VALUES - (make_array(1, NULL, 3, 4, 5, 6, 7, 8, 9, 10), 2, 1), - (make_array(11, 12, NULL, 14, 15, 16, 17, 18, 19, 20), 5, 2), - (make_array(21, 22, 23, 24, NULL, 26, 27, 28, 29, 30), 8, 3), - (make_array(31, 32, 33, 34, 35, 36, NULL, 38, 39, 40), 12, 4), - (NULL, 3, 0), - (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6), - (make_array(51, 52, 53, 54, 55, NULL, 57, 58, 59, 60), 13, NULL), - (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 15, 7) -; - -# array_resize columnar test #1 -query ? -select array_resize(column1, column2, column3) from array_resize_values; ----- -[1, NULL] -[11, 12, NULL, 14, 15] -[21, 22, 23, 24, NULL, 26, 27, 28] -[31, 32, 33, 34, 35, 36, NULL, 38, 39, 40, 4, 4] -NULL -[] -[51, 52, 53, 54, 55, NULL, 57, 58, 59, 60, NULL, NULL, NULL] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7] - -# array_resize columnar test #2 -query ? -select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from array_resize_values; ----- -[1, NULL] -[11, 12, NULL, 14, 15] -[21, 22, 23, 24, NULL, 26, 27, 28] -[31, 32, 33, 34, 35, 36, NULL, 38, 39, 40, 4, 4] -NULL -[] -[51, 52, 53, 54, 55, NULL, 57, 58, 59, 60, NULL, NULL, NULL] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7] - -## array_reverse -query ?? -select array_reverse(make_array(1, 2, 3)), array_reverse(make_array(1)); ----- -[3, 2, 1] [1] - -query ?? -select array_reverse(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_reverse(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -[3, 2, 1] [1] - -query ???? -select array_reverse(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), - array_reverse(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')), - array_reverse(arrow_cast(make_array(1, NULL, 3), 'FixedSizeList(3, Int64)')), - array_reverse(arrow_cast(make_array(NULL, NULL, NULL), 'FixedSizeList(3, Int64)')); ----- -[3, 2, 1] [1] [3, NULL, 1] [NULL, NULL, NULL] - -query ?? -select array_reverse(NULL), array_reverse([]); ----- -NULL [] - -query ?? -select array_reverse(column1), column1 from arrays_values; ----- -[10, 9, 8, 7, 6, 5, 4, 3, 2, NULL] [NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] -[20, NULL, 18, 17, 16, 15, 14, 13, 12, 11] [11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] -[30, 29, 28, 27, 26, 25, NULL, 23, 22, 21] [21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] -[40, 39, 38, 37, NULL, 35, 34, 33, 32, 31] [31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] -NULL NULL -[50, 49, 48, 47, 46, 45, 44, 43, 42, 41] [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] -[60, 59, 58, 57, 56, 55, 54, NULL, 52, 51] [51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] -[70, 69, 68, 67, 66, 65, 64, 63, 62, 61] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] - -statement ok -CREATE TABLE test_reverse_fixed_size AS VALUES - (arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)')), - (arrow_cast([4, 5, 6], 'FixedSizeList(3, Int64)')), - (arrow_cast([NULL, 8, 9], 'FixedSizeList(3, Int64)')), - (NULL); - -query ? -SELECT array_reverse(column1) FROM test_reverse_fixed_size; ----- -[3, 2, 1] -[6, 5, 4] -[9, 8, NULL] -NULL - -statement ok -DROP TABLE test_reverse_fixed_size; - -# Test defining a table with array columns -statement ok -create table test_create_array_table( - a int[], - b text[], - -- two-dimensional array - c int[][], - d int -); - -query I -insert into test_create_array_table values - ([1, 2, 3], ['a', 'b', 'c'], [[4,6], [6,7,8]], 1); ----- -1 - -query ???I -select * from test_create_array_table; ----- -[1, 2, 3] [a, b, c] [[4, 6], [6, 7, 8]] 1 - -query T -select arrow_typeof(a) from test_create_array_table; ----- -List(Int32) - -query T -select arrow_typeof(c) from test_create_array_table; ----- -List(List(Int32)) - -# Test casting to array types -# issue: https://github.com/apache/datafusion/issues/9440 -query ??T -select [1,2,3]::int[], [['1']]::int[][], arrow_typeof([]::text[]); ----- -[1, 2, 3] [[1]] List(Utf8View) - -# test empty arrays return length -# issue: https://github.com/apache/datafusion/pull/12459 -statement ok -create table values_all_empty (a int[]) as values ([]), ([]); - -query B -select array_has(a, 1) from values_all_empty; ----- -false -false - -# Test create table with fixed sized array -statement ok -create table fixed_size_col_table (a int[3]) as values ([1,2,3]), ([4,5,6]); - -query T -select arrow_typeof(a) from fixed_size_col_table; ----- -FixedSizeList(3 x Int32) -FixedSizeList(3 x Int32) - -query ? rowsort -SELECT DISTINCT a FROM fixed_size_col_table ----- -[1, 2, 3] -[4, 5, 6] - -query ?I rowsort -SELECT a, count(*) FROM fixed_size_col_table GROUP BY a ----- -[1, 2, 3] 1 -[4, 5, 6] 1 - -statement error Cast error: Cannot cast to FixedSizeList\(3\): value at index 0 has length 2 -create table varying_fixed_size_col_table (a int[3]) as values ([1,2,3]), ([4,5]); - -# https://github.com/apache/datafusion/issues/16187 -# should be NULL in case of out of bounds for Null Type -query ? -select [named_struct('a', 1, 'b', null)][-2]; ----- -NULL - -statement ok -COPY (select [[true, false], [false, true]] a, [false, true] b union select [[null, null]], null) to 'test_files/scratch/array/array_has/single_file.parquet' stored as parquet; - -statement ok -CREATE EXTERNAL TABLE array_has STORED AS PARQUET location 'test_files/scratch/array/array_has/single_file.parquet'; - -query B -select array_contains(a, b) from array_has order by 1 nulls last; ----- -true -NULL - -# Expected output (once supported): -# ---- -# [5, 4, 3, 2, 1] -query error -select array_reverse(arrow_cast(make_array(1, 2, 3, 4, 5), 'ListView(Int64)')); - -### Delete tables - -statement ok -drop table values; - -statement ok -drop table values_without_nulls; - -statement ok -drop table nested_arrays; - -statement ok -drop table large_nested_arrays; - -statement ok -drop table fixed_size_nested_arrays; - -statement ok -drop table arrays; - -statement ok -drop table large_arrays; - -statement ok -drop table fixed_size_arrays; - -statement ok -drop table slices; - -statement ok -drop table fixed_slices; - -statement ok -drop table arrayspop; - -statement ok -drop table large_arrayspop; - -statement ok -drop table arrays_values; - -statement ok -drop table arrays_values_v2; - -statement ok -drop table large_arrays_values_v2; - -statement ok -drop table array_has_table_1D; - -statement ok -drop table array_has_table_1D_Float; - -statement ok -drop table array_has_table_1D_Boolean; - -statement ok -drop table array_has_table_1D_UTF8; - -statement ok -drop table array_has_table_2D; - -statement ok -drop table array_has_table_2D_float; - -statement ok -drop table array_has_table_3D; - -statement ok -drop table array_intersect_table_1D; - -statement ok -drop table large_array_intersect_table_1D; - -statement ok -drop table array_intersect_table_1D_Float; - -statement ok -drop table large_array_intersect_table_1D_Float; - -statement ok -drop table array_intersect_table_1D_Boolean; - -statement ok -drop table large_array_intersect_table_1D_Boolean; - -statement ok -drop table array_intersect_table_1D_UTF8; - -statement ok -drop table large_array_intersect_table_1D_UTF8; - -statement ok -drop table array_intersect_table_2D; - -statement ok -drop table large_array_intersect_table_2D; - -statement ok -drop table array_intersect_table_2D_float; - -statement ok -drop table large_array_intersect_table_2D_float; - -statement ok -drop table array_intersect_table_3D; - -statement ok -drop table large_array_intersect_table_3D; - -statement ok -drop table fixed_size_array_has_table_1D; - -statement ok -drop table fixed_size_array_has_table_1D_Float; - -statement ok -drop table fixed_size_array_has_table_1D_Boolean; - -statement ok -drop table fixed_size_array_has_table_1D_UTF8; - -statement ok -drop table fixed_size_array_has_table_2D; - -statement ok -drop table fixed_size_array_has_table_2D_float; - -statement ok -drop table fixed_size_array_has_table_3D; - -statement ok -drop table arrays_range; - -statement ok -drop table arrays_with_repeating_elements; - -statement ok -drop table large_arrays_with_repeating_elements; - -statement ok -drop table fixed_arrays_with_repeating_elements; - -statement ok -drop table nested_arrays_with_repeating_elements; - -statement ok -drop table large_nested_arrays_with_repeating_elements; - -statement ok -drop table fixed_size_nested_arrays_with_repeating_elements; - -statement ok -drop table flatten_table; - -statement ok -drop table large_flatten_table; - -statement ok -drop table fixed_size_flatten_table; - -statement ok -drop table arrays_values_without_nulls; - -statement ok -drop table large_arrays_values_without_nulls; - -statement ok -drop table fixed_size_arrays_values_without_nulls; - -statement ok -drop table test_create_array_table; - -statement ok -drop table values_all_empty; - -statement ok -drop table fixed_size_col_table; - -statement ok -drop table array_has; From a4060fe87e641f8694cb34b9e9b40fe633deb173 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 23 Apr 2026 10:57:58 +0200 Subject: [PATCH 62/70] Add table ref to ListingTableUrl --- datafusion/core/src/execution/context/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index d0b8f368cdfb3..2362ed33e14fd 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1759,7 +1759,9 @@ impl SessionContext { provided_schema: Option, sql_definition: Option, ) -> Result<()> { - let table_path = ListingTableUrl::parse(table_path)?; + let table_ref = table_ref.into(); + let table_path = + ListingTableUrl::parse(table_path)?.with_table_ref(table_ref.clone()); let resolved_schema = match provided_schema { Some(s) => s, None => options.infer_schema(&self.state(), &table_path).await?, From 7808b90ebd2359118db74a784a51d34e3bc5cd2a Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 23 Apr 2026 11:30:39 +0200 Subject: [PATCH 63/70] Add heapsize for table-scoped-path --- datafusion/common/src/heap_size.rs | 8 +++++++- .../execution/src/cache/file_statistics_cache.rs | 10 +++++----- .../execution/src/cache/list_files_cache.rs | 15 +++++++++++---- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index b744225681450..3049530550a07 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -16,7 +16,7 @@ // under the License. use crate::stats::Precision; -use crate::{ColumnStatistics, ScalarValue, Statistics}; +use crate::{ColumnStatistics, ScalarValue, Statistics, TableReference}; use arrow::array::{ Array, FixedSizeListArray, LargeListArray, LargeListViewArray, ListArray, ListViewArray, MapArray, StructArray, @@ -51,6 +51,12 @@ impl DFHeapSize for Statistics { } } +impl DFHeapSize for TableReference { + fn heap_size(&self) -> usize { + self.table().heap_size() + } +} + impl DFHeapSize for Precision { diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index 1faeff4fa7a93..5a53fb4ca94e8 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -100,7 +100,7 @@ impl DefaultFileStatisticsCacheState { key: &TableScopedPath, value: CachedFileMetadata, ) -> Option { - let key_size = key.path.as_ref().heap_size(); + let key_size = key.heap_size(); let entry_size = value.heap_size(); if entry_size + key_size > self.memory_limit { @@ -111,11 +111,11 @@ impl DefaultFileStatisticsCacheState { let old_value = self.lru_queue.put(key.clone(), value); self.memory_used += entry_size; - self.memory_used += key.path.as_ref().heap_size(); + self.memory_used += key.heap_size(); if let Some(old_entry) = &old_value { self.memory_used -= old_entry.heap_size(); - self.memory_used -= key.path.as_ref().heap_size(); + self.memory_used -= key.heap_size(); } self.evict_entries(); @@ -125,7 +125,7 @@ impl DefaultFileStatisticsCacheState { fn remove(&mut self, k: &TableScopedPath) -> Option { if let Some(old_entry) = self.lru_queue.remove(k) { - self.memory_used -= k.path.as_ref().heap_size(); + self.memory_used -= k.heap_size(); self.memory_used -= old_entry.heap_size(); Some(old_entry) } else { @@ -149,7 +149,7 @@ impl DefaultFileStatisticsCacheState { fn evict_entries(&mut self) { while self.memory_used > self.memory_limit { if let Some(removed) = self.lru_queue.pop() { - self.memory_used -= removed.0.path.as_ref().heap_size(); + self.memory_used -= removed.0.heap_size(); self.memory_used -= removed.1.heap_size(); } else { // cache is empty while memory_used > memory_limit, cannot happen diff --git a/datafusion/execution/src/cache/list_files_cache.rs b/datafusion/execution/src/cache/list_files_cache.rs index b1b8e6b500169..92aad904273d2 100644 --- a/datafusion/execution/src/cache/list_files_cache.rs +++ b/datafusion/execution/src/cache/list_files_cache.rs @@ -22,16 +22,17 @@ use std::{ time::Duration, }; -use datafusion_common::TableReference; -use datafusion_common::instant::Instant; -use object_store::{ObjectMeta, path::Path}; - use crate::cache::{ CacheAccessor, cache_manager::{CachedFileList, ListFilesCache}, lru_queue::LruQueue, }; +use datafusion_common::TableReference; +use datafusion_common::heap_size::DFHeapSize; +use datafusion_common::instant::Instant; +use object_store::{ObjectMeta, path::Path}; + pub trait TimeProvider: Send + Sync + 'static { fn now(&self) -> Instant; } @@ -169,6 +170,12 @@ impl Default for DefaultListFilesCacheState { } } +impl DFHeapSize for TableScopedPath { + fn heap_size(&self) -> usize { + self.path.as_ref().heap_size() + self.table.heap_size() + } +} + impl DefaultListFilesCacheState { fn new(memory_limit: usize, ttl: Option) -> Self { Self { From b3c283d2255e764809de116dc24681fb273bc38e Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 23 Apr 2026 11:47:34 +0200 Subject: [PATCH 64/70] Make list_entries table-scoped --- datafusion/execution/src/cache/cache_manager.rs | 4 +--- .../execution/src/cache/file_statistics_cache.rs | 8 +++----- datafusion/execution/src/cache/list_files_cache.rs | 12 +++++++++++- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 066876945f995..251faf7c2f8e1 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -105,7 +105,7 @@ pub trait FileStatisticsCache: fn update_cache_limit(&self, limit: usize); /// Retrieves the information about the entries currently cached. - fn list_entries(&self) -> HashMap; + fn list_entries(&self) -> HashMap; fn drop_table_entries(&self, table_ref: &Option) -> Result<()>; } @@ -137,8 +137,6 @@ pub struct FileStatisticsCacheEntry { pub statistics_size_bytes: usize, /// Whether ordering information is cached for this file. pub has_ordering: bool, - /// Reference to the table associated with this statistics entry. - pub table_reference: Option, } /// Cached file listing. diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index 5a53fb4ca94e8..fd787f2dc7544 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -19,7 +19,6 @@ use crate::cache::cache_manager::{ CachedFileMetadata, FileStatisticsCache, FileStatisticsCacheEntry, }; use crate::cache::{CacheAccessor, TableScopedPath}; -use object_store::path::Path; use std::collections::HashMap; use std::sync::Mutex; @@ -221,13 +220,13 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { state.evict_entries(); } - fn list_entries(&self) -> HashMap { - let mut entries = HashMap::::new(); + fn list_entries(&self) -> HashMap { + let mut entries = HashMap::::new(); for entry in self.state.lock().unwrap().lru_queue.list_entries() { let path = entry.0.clone(); let cached = entry.1; entries.insert( - path.path, + path, FileStatisticsCacheEntry { object_meta: cached.meta.clone(), num_rows: cached.statistics.num_rows, @@ -235,7 +234,6 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { table_size_bytes: cached.statistics.total_byte_size, statistics_size_bytes: cached.statistics.heap_size(), has_ordering: cached.ordering.is_some(), - table_reference: path.table, }, ); } diff --git a/datafusion/execution/src/cache/list_files_cache.rs b/datafusion/execution/src/cache/list_files_cache.rs index 92aad904273d2..c173ec855320a 100644 --- a/datafusion/execution/src/cache/list_files_cache.rs +++ b/datafusion/execution/src/cache/list_files_cache.rs @@ -21,7 +21,7 @@ use std::{ sync::{Arc, Mutex}, time::Duration, }; - +use std::fmt::{Debug, Display, Formatter}; use crate::cache::{ CacheAccessor, cache_manager::{CachedFileList, ListFilesCache}, @@ -176,6 +176,16 @@ impl DFHeapSize for TableScopedPath { } } +impl Display for TableScopedPath { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if let Some(table) = &self.table { + write!(f, "({}, {})", self.path, table) + } else { + write!(f, "({})", self.path) + } + } +} + impl DefaultListFilesCacheState { fn new(memory_limit: usize, ttl: Option) -> Self { Self { From 97888f6a0abfcffb841edc467b5c45da0fcc267e Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 23 Apr 2026 11:57:54 +0200 Subject: [PATCH 65/70] fixup! Make list_entries table-scoped --- .../execution/src/cache/file_statistics_cache.rs | 11 +++++------ datafusion/execution/src/cache/list_files_cache.rs | 13 +++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index fd787f2dc7544..eebe8124c7cba 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -275,6 +275,7 @@ mod tests { use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; use object_store::ObjectMeta; + use object_store::path::Path; use std::sync::Arc; fn create_test_meta(path: &str, size: u64) -> ObjectMeta { @@ -351,7 +352,7 @@ mod tests { table: None, }; - let entry = entries.get(&path_3.path).unwrap(); + let entry = entries.get(&path_3).unwrap(); assert_eq!(entry.object_meta.size, 2048); // Should be updated value } @@ -442,7 +443,7 @@ mod tests { // Verify list_entries shows has_ordering = true let entries = cache.list_entries(); assert_eq!(entries.len(), 1); - assert!(entries.get(&path.path).unwrap().has_ordering); + assert!(entries.get(&path).unwrap().has_ordering); } #[test] @@ -587,7 +588,7 @@ mod tests { entries, HashMap::from([ ( - Path::from("test1.parquet"), + path_1, FileStatisticsCacheEntry { object_meta: meta1, num_rows: Precision::Absent, @@ -595,11 +596,10 @@ mod tests { table_size_bytes: Precision::Absent, statistics_size_bytes: 304, has_ordering: false, - table_reference: None, } ), ( - Path::from("test2.parquet"), + path_2, FileStatisticsCacheEntry { object_meta: meta2, num_rows: Precision::Absent, @@ -607,7 +607,6 @@ mod tests { table_size_bytes: Precision::Absent, statistics_size_bytes: 304, has_ordering: true, - table_reference: None, } ), ]) diff --git a/datafusion/execution/src/cache/list_files_cache.rs b/datafusion/execution/src/cache/list_files_cache.rs index c173ec855320a..3cccf7e6421b5 100644 --- a/datafusion/execution/src/cache/list_files_cache.rs +++ b/datafusion/execution/src/cache/list_files_cache.rs @@ -15,18 +15,19 @@ // specific language governing permissions and limitations // under the License. +use crate::cache::{ + CacheAccessor, + cache_manager::{CachedFileList, ListFilesCache}, + lru_queue::LruQueue, +}; + +use std::fmt::{Debug, Display, Formatter}; use std::mem::size_of; use std::{ collections::HashMap, sync::{Arc, Mutex}, time::Duration, }; -use std::fmt::{Debug, Display, Formatter}; -use crate::cache::{ - CacheAccessor, - cache_manager::{CachedFileList, ListFilesCache}, - lru_queue::LruQueue, -}; use datafusion_common::TableReference; use datafusion_common::heap_size::DFHeapSize; From 613817839999957a6ad44090ce1260ffb3563056 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 23 Apr 2026 12:09:07 +0200 Subject: [PATCH 66/70] fixup! fixup! Make list_entries table-scoped --- datafusion/execution/src/cache/list_files_cache.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/list_files_cache.rs b/datafusion/execution/src/cache/list_files_cache.rs index 3cccf7e6421b5..e04bb9340b09b 100644 --- a/datafusion/execution/src/cache/list_files_cache.rs +++ b/datafusion/execution/src/cache/list_files_cache.rs @@ -180,9 +180,9 @@ impl DFHeapSize for TableScopedPath { impl Display for TableScopedPath { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { if let Some(table) = &self.table { - write!(f, "({}, {})", self.path, table) + write!(f, "{}, {}", self.path, table) } else { - write!(f, "({})", self.path) + write!(f, "{}", self.path) } } } From 492b00d5399a948e11a152096b0919c7d37f7e89 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Sun, 26 Apr 2026 08:02:41 +0200 Subject: [PATCH 67/70] Improve heap size estimation for Arc --- datafusion/common/src/heap_size.rs | 331 ++++++++++-------- .../execution/src/cache/cache_manager.rs | 16 +- .../src/cache/file_statistics_cache.rs | 48 +-- .../execution/src/cache/list_files_cache.rs | 6 +- 4 files changed, 223 insertions(+), 178 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 3049530550a07..2855091c08e2a 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -27,6 +27,7 @@ use arrow::datatypes::{ }; use chrono::{DateTime, Utc}; use half::f16; +use hashbrown::HashSet; use std::collections::HashMap; use std::fmt::Debug; use std::sync::Arc; @@ -40,103 +41,110 @@ pub trait DFHeapSize { /// /// Note that the size of the type itself is not included in the result -- /// instead, that size is added by the caller (e.g. container). - fn heap_size(&self) -> usize; + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize; +} + +#[derive(Default)] +pub struct DFHeapSizeCtx { + seen: HashSet, } impl DFHeapSize for Statistics { - fn heap_size(&self) -> usize { - self.num_rows.heap_size() - + self.total_byte_size.heap_size() - + self.column_statistics.heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.num_rows.heap_size(ctx) + + self.total_byte_size.heap_size(ctx) + + self.column_statistics.heap_size(ctx) } } impl DFHeapSize for TableReference { - fn heap_size(&self) -> usize { - self.table().heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.table().heap_size(ctx) } } impl DFHeapSize for Precision { - fn heap_size(&self) -> usize { - self.get_value().map_or_else(|| 0, |v| v.heap_size()) + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.get_value().map_or_else(|| 0, |v| v.heap_size(ctx)) } } impl DFHeapSize for ColumnStatistics { - fn heap_size(&self) -> usize { - self.null_count.heap_size() - + self.max_value.heap_size() - + self.min_value.heap_size() - + self.sum_value.heap_size() - + self.distinct_count.heap_size() - + self.byte_size.heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.null_count.heap_size(ctx) + + self.max_value.heap_size(ctx) + + self.min_value.heap_size(ctx) + + self.sum_value.heap_size(ctx) + + self.distinct_count.heap_size(ctx) + + self.byte_size.heap_size(ctx) } } impl DFHeapSize for ScalarValue { - fn heap_size(&self) -> usize { + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { use crate::scalar::ScalarValue::*; match self { Null => 0, - Boolean(b) => b.heap_size(), - Float16(f) => f.heap_size(), - Float32(f) => f.heap_size(), - Float64(f) => f.heap_size(), - Decimal32(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), - Decimal64(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), - Decimal128(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), - Decimal256(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), - Int8(i) => i.heap_size(), - Int16(i) => i.heap_size(), - Int32(i) => i.heap_size(), - Int64(i) => i.heap_size(), - UInt8(u) => u.heap_size(), - UInt16(u) => u.heap_size(), - UInt32(u) => u.heap_size(), - UInt64(u) => u.heap_size(), - Utf8(u) => u.heap_size(), - Utf8View(u) => u.heap_size(), - LargeUtf8(l) => l.heap_size(), - Binary(b) => b.heap_size(), - BinaryView(b) => b.heap_size(), - FixedSizeBinary(a, b) => a.heap_size() + b.heap_size(), - LargeBinary(l) => l.heap_size(), - FixedSizeList(f) => f.heap_size(), - List(l) => l.heap_size(), - LargeList(l) => l.heap_size(), - Struct(s) => s.heap_size(), - Map(m) => m.heap_size(), - Date32(d) => d.heap_size(), - Date64(d) => d.heap_size(), - Time32Second(t) => t.heap_size(), - Time32Millisecond(t) => t.heap_size(), - Time64Microsecond(t) => t.heap_size(), - Time64Nanosecond(t) => t.heap_size(), - TimestampSecond(a, b) => a.heap_size() + b.heap_size(), - TimestampMillisecond(a, b) => a.heap_size() + b.heap_size(), - TimestampMicrosecond(a, b) => a.heap_size() + b.heap_size(), - TimestampNanosecond(a, b) => a.heap_size() + b.heap_size(), - IntervalYearMonth(i) => i.heap_size(), - IntervalDayTime(i) => i.heap_size(), - IntervalMonthDayNano(i) => i.heap_size(), - DurationSecond(d) => d.heap_size(), - DurationMillisecond(d) => d.heap_size(), - DurationMicrosecond(d) => d.heap_size(), - DurationNanosecond(d) => d.heap_size(), - Union(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), - Dictionary(a, b) => a.heap_size() + b.heap_size(), - RunEndEncoded(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), - ListView(a) => a.heap_size(), - LargeListView(a) => a.heap_size(), + Boolean(b) => b.heap_size(ctx), + Float16(f) => f.heap_size(ctx), + Float32(f) => f.heap_size(ctx), + Float64(f) => f.heap_size(ctx), + Decimal32(a, b, c) => a.heap_size(ctx) + b.heap_size(ctx) + c.heap_size(ctx), + Decimal64(a, b, c) => a.heap_size(ctx) + b.heap_size(ctx) + c.heap_size(ctx), + Decimal128(a, b, c) => a.heap_size(ctx) + b.heap_size(ctx) + c.heap_size(ctx), + Decimal256(a, b, c) => a.heap_size(ctx) + b.heap_size(ctx) + c.heap_size(ctx), + Int8(i) => i.heap_size(ctx), + Int16(i) => i.heap_size(ctx), + Int32(i) => i.heap_size(ctx), + Int64(i) => i.heap_size(ctx), + UInt8(u) => u.heap_size(ctx), + UInt16(u) => u.heap_size(ctx), + UInt32(u) => u.heap_size(ctx), + UInt64(u) => u.heap_size(ctx), + Utf8(u) => u.heap_size(ctx), + Utf8View(u) => u.heap_size(ctx), + LargeUtf8(l) => l.heap_size(ctx), + Binary(b) => b.heap_size(ctx), + BinaryView(b) => b.heap_size(ctx), + FixedSizeBinary(a, b) => a.heap_size(ctx) + b.heap_size(ctx), + LargeBinary(l) => l.heap_size(ctx), + FixedSizeList(f) => f.heap_size(ctx), + List(l) => l.heap_size(ctx), + LargeList(l) => l.heap_size(ctx), + Struct(s) => s.heap_size(ctx), + Map(m) => m.heap_size(ctx), + Date32(d) => d.heap_size(ctx), + Date64(d) => d.heap_size(ctx), + Time32Second(t) => t.heap_size(ctx), + Time32Millisecond(t) => t.heap_size(ctx), + Time64Microsecond(t) => t.heap_size(ctx), + Time64Nanosecond(t) => t.heap_size(ctx), + TimestampSecond(a, b) => a.heap_size(ctx) + b.heap_size(ctx), + TimestampMillisecond(a, b) => a.heap_size(ctx) + b.heap_size(ctx), + TimestampMicrosecond(a, b) => a.heap_size(ctx) + b.heap_size(ctx), + TimestampNanosecond(a, b) => a.heap_size(ctx) + b.heap_size(ctx), + IntervalYearMonth(i) => i.heap_size(ctx), + IntervalDayTime(i) => i.heap_size(ctx), + IntervalMonthDayNano(i) => i.heap_size(ctx), + DurationSecond(d) => d.heap_size(ctx), + DurationMillisecond(d) => d.heap_size(ctx), + DurationMicrosecond(d) => d.heap_size(ctx), + DurationNanosecond(d) => d.heap_size(ctx), + Union(a, b, c) => a.heap_size(ctx) + b.heap_size(ctx) + c.heap_size(ctx), + Dictionary(a, b) => a.heap_size(ctx) + b.heap_size(ctx), + RunEndEncoded(a, b, c) => { + a.heap_size(ctx) + b.heap_size(ctx) + c.heap_size(ctx) + } + ListView(a) => a.heap_size(ctx), + LargeListView(a) => a.heap_size(ctx), } } } impl DFHeapSize for DataType { - fn heap_size(&self) -> usize { + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { use DataType::*; match self { Null => 0, @@ -152,50 +160,50 @@ impl DFHeapSize for DataType { Float16 => 0, Float32 => 0, Float64 => 0, - Timestamp(t, s) => t.heap_size() + s.heap_size(), + Timestamp(t, s) => t.heap_size(ctx) + s.heap_size(ctx), Date32 => 0, Date64 => 0, - Time32(t) => t.heap_size(), - Time64(t) => t.heap_size(), - Duration(t) => t.heap_size(), - Interval(i) => i.heap_size(), + Time32(t) => t.heap_size(ctx), + Time64(t) => t.heap_size(ctx), + Duration(t) => t.heap_size(ctx), + Interval(i) => i.heap_size(ctx), Binary => 0, - FixedSizeBinary(i) => i.heap_size(), + FixedSizeBinary(i) => i.heap_size(ctx), LargeBinary => 0, BinaryView => 0, Utf8 => 0, LargeUtf8 => 0, Utf8View => 0, - List(v) => v.heap_size(), - ListView(v) => v.heap_size(), - FixedSizeList(f, i) => f.heap_size() + i.heap_size(), - LargeList(l) => l.heap_size(), - LargeListView(l) => l.heap_size(), - Struct(s) => s.heap_size(), - Union(u, m) => u.heap_size() + m.heap_size(), - Dictionary(a, b) => a.heap_size() + b.heap_size(), - Decimal32(p, s) => p.heap_size() + s.heap_size(), - Decimal64(p, s) => p.heap_size() + s.heap_size(), - Decimal128(p, s) => p.heap_size() + s.heap_size(), - Decimal256(p, s) => p.heap_size() + s.heap_size(), - Map(m, b) => m.heap_size() + b.heap_size(), - RunEndEncoded(a, b) => a.heap_size() + b.heap_size(), + List(v) => v.heap_size(ctx), + ListView(v) => v.heap_size(ctx), + FixedSizeList(f, i) => f.heap_size(ctx) + i.heap_size(ctx), + LargeList(l) => l.heap_size(ctx), + LargeListView(l) => l.heap_size(ctx), + Struct(s) => s.heap_size(ctx), + Union(u, m) => u.heap_size(ctx) + m.heap_size(ctx), + Dictionary(a, b) => a.heap_size(ctx) + b.heap_size(ctx), + Decimal32(p, s) => p.heap_size(ctx) + s.heap_size(ctx), + Decimal64(p, s) => p.heap_size(ctx) + s.heap_size(ctx), + Decimal128(p, s) => p.heap_size(ctx) + s.heap_size(ctx), + Decimal256(p, s) => p.heap_size(ctx) + s.heap_size(ctx), + Map(m, b) => m.heap_size(ctx) + b.heap_size(ctx), + RunEndEncoded(a, b) => a.heap_size(ctx) + b.heap_size(ctx), } } } impl DFHeapSize for Vec { - fn heap_size(&self) -> usize { + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { let item_size = size_of::(); // account for the contents of the Vec (self.capacity() * item_size) + // add any heap allocations by contents - self.iter().map(|t| t.heap_size()).sum::() + self.iter().map(|t| t.heap_size(ctx)).sum::() } } impl DFHeapSize for HashMap { - fn heap_size(&self) -> usize { + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { let capacity = self.capacity(); if capacity == 0 { return 0; @@ -231,86 +239,92 @@ impl DFHeapSize for HashMap { group_size + (buckets * (key_val_size + metadata_size)) - + self.keys().map(|k| k.heap_size()).sum::() - + self.values().map(|v| v.heap_size()).sum::() + + self.keys().map(|k| k.heap_size(ctx)).sum::() + + self.values().map(|v| v.heap_size(ctx)).sum::() } } impl DFHeapSize for Arc { - fn heap_size(&self) -> usize { + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + let ptr = Arc::as_ptr(self) as usize; + + if !ctx.seen.insert(ptr) { + return 0; + } + // Arc stores weak and strong counts on the heap alongside an instance of T - 2 * size_of::() + self.as_ref().heap_size() + 2 * size_of::() + self.as_ref().heap_size(ctx) } } impl DFHeapSize for Arc { - fn heap_size(&self) -> usize { - 2 * size_of::() + size_of_val(self.as_ref()) + self.as_ref().heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + 2 * size_of::() + size_of_val(self.as_ref()) + self.as_ref().heap_size(ctx) } } impl DFHeapSize for Fields { - fn heap_size(&self) -> usize { - self.into_iter().map(|f| f.heap_size()).sum::() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.into_iter().map(|f| f.heap_size(ctx)).sum::() } } impl DFHeapSize for StructArray { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.get_array_memory_size() } } impl DFHeapSize for LargeListArray { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.get_array_memory_size() } } impl DFHeapSize for LargeListViewArray { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.get_array_memory_size() } } impl DFHeapSize for ListArray { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.get_array_memory_size() } } impl DFHeapSize for ListViewArray { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.get_array_memory_size() } } impl DFHeapSize for FixedSizeListArray { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.get_array_memory_size() } } impl DFHeapSize for MapArray { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.get_array_memory_size() } } impl DFHeapSize for Arc { - fn heap_size(&self) -> usize { - 2 * size_of::() + self.as_ref().heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + 2 * size_of::() + self.as_ref().heap_size(ctx) } } impl DFHeapSize for Box { - fn heap_size(&self) -> usize { - size_of::() + self.as_ref().heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + size_of::() + self.as_ref().heap_size(ctx) } } impl DFHeapSize for Option { - fn heap_size(&self) -> usize { - self.as_ref().map(|inner| inner.heap_size()).unwrap_or(0) + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.as_ref().map(|inner| inner.heap_size(ctx)).unwrap_or(0) } } @@ -319,158 +333,181 @@ where A: DFHeapSize, B: DFHeapSize, { - fn heap_size(&self) -> usize { - self.0.heap_size() + self.1.heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.0.heap_size(ctx) + self.1.heap_size(ctx) } } impl DFHeapSize for String { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.capacity() } } impl DFHeapSize for str { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.len() } } impl DFHeapSize for UnionFields { - fn heap_size(&self) -> usize { - self.iter().map(|f| f.0.heap_size() + f.1.heap_size()).sum() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.iter() + .map(|f| f.0.heap_size(ctx) + f.1.heap_size(ctx)) + .sum() } } impl DFHeapSize for UnionMode { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for TimeUnit { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for IntervalUnit { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for Field { - fn heap_size(&self) -> usize { - self.name().heap_size() - + self.data_type().heap_size() - + self.is_nullable().heap_size() - + self.dict_is_ordered().heap_size() - + self.metadata().heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.name().heap_size(ctx) + + self.data_type().heap_size(ctx) + + self.is_nullable().heap_size(ctx) + + self.dict_is_ordered().heap_size(ctx) + + self.metadata().heap_size(ctx) } } impl DFHeapSize for IntervalMonthDayNano { - fn heap_size(&self) -> usize { - self.days.heap_size() + self.months.heap_size() + self.nanoseconds.heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.days.heap_size(ctx) + + self.months.heap_size(ctx) + + self.nanoseconds.heap_size(ctx) } } impl DFHeapSize for IntervalDayTime { - fn heap_size(&self) -> usize { - self.days.heap_size() + self.milliseconds.heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.days.heap_size(ctx) + self.milliseconds.heap_size(ctx) } } impl DFHeapSize for DateTime { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for bool { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for u8 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for u16 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for u32 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for u64 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for i8 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for i16 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for i32 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for i64 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for i128 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for i256 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for f16 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for f32 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for f64 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for usize { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } + +#[test] +fn test_heap_size_arc_avoid_double_accounting() { + let a1 = Arc::new(vec![1, 2, 3]); + let mut ctx = DFHeapSizeCtx::default(); + let heap_size = a1.heap_size(&mut ctx); + + let a2 = Arc::clone(&a1); + let a3 = Arc::clone(&a1); + let a4 = Arc::clone(&a3); + + let mut ctx = DFHeapSizeCtx::default(); + let heap_size_with_clones = a1.heap_size(&mut ctx) + + a2.heap_size(&mut ctx) + + a3.heap_size(&mut ctx) + + a4.heap_size(&mut ctx); + + assert_eq!(heap_size, heap_size_with_clones); +} diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 251faf7c2f8e1..09861ddf6451e 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -24,7 +24,7 @@ use crate::cache::file_statistics_cache::{ use crate::cache::list_files_cache::ListFilesEntry; use crate::cache::list_files_cache::TableScopedPath; use datafusion_common::TableReference; -use datafusion_common::heap_size::DFHeapSize; +use datafusion_common::heap_size::{DFHeapSize, DFHeapSizeCtx}; use datafusion_common::stats::Precision; use datafusion_common::{Result, Statistics}; use datafusion_physical_expr_common::sort_expr::LexOrdering; @@ -111,13 +111,13 @@ pub trait FileStatisticsCache: } impl DFHeapSize for CachedFileMetadata { - fn heap_size(&self) -> usize { - self.meta.size.heap_size() - + self.meta.last_modified.heap_size() - + self.meta.version.heap_size() - + self.meta.e_tag.heap_size() - + self.meta.location.as_ref().heap_size() - + self.statistics.heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.meta.size.heap_size(ctx) + + self.meta.last_modified.heap_size(ctx) + + self.meta.version.heap_size(ctx) + + self.meta.e_tag.heap_size(ctx) + + self.meta.location.as_ref().heap_size(ctx) + + self.statistics.heap_size(ctx) //TODO add ordering once LexOrdering/PhysicalExpr implements DFHeapSize } } diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index eebe8124c7cba..fe9b0edccc9a2 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -25,7 +25,7 @@ use std::sync::Mutex; pub use crate::cache::DefaultFilesMetadataCache; use crate::cache::lru_queue::LruQueue; use datafusion_common::TableReference; -use datafusion_common::heap_size::DFHeapSize; +use datafusion_common::heap_size::{DFHeapSize, DFHeapSizeCtx}; /// Default implementation of [`FileStatisticsCache`] /// @@ -99,8 +99,9 @@ impl DefaultFileStatisticsCacheState { key: &TableScopedPath, value: CachedFileMetadata, ) -> Option { - let key_size = key.heap_size(); - let entry_size = value.heap_size(); + let mut ctx = DFHeapSizeCtx::default(); + let key_size = key.heap_size(&mut ctx); + let entry_size = value.heap_size(&mut ctx); if entry_size + key_size > self.memory_limit { // Remove potential stale entry @@ -110,11 +111,11 @@ impl DefaultFileStatisticsCacheState { let old_value = self.lru_queue.put(key.clone(), value); self.memory_used += entry_size; - self.memory_used += key.heap_size(); + self.memory_used += key.heap_size(&mut ctx); if let Some(old_entry) = &old_value { - self.memory_used -= old_entry.heap_size(); - self.memory_used -= key.heap_size(); + self.memory_used -= old_entry.heap_size(&mut ctx); + self.memory_used -= key.heap_size(&mut ctx); } self.evict_entries(); @@ -124,8 +125,9 @@ impl DefaultFileStatisticsCacheState { fn remove(&mut self, k: &TableScopedPath) -> Option { if let Some(old_entry) = self.lru_queue.remove(k) { - self.memory_used -= k.heap_size(); - self.memory_used -= old_entry.heap_size(); + let mut ctx = DFHeapSizeCtx::default(); + self.memory_used -= k.heap_size(&mut ctx); + self.memory_used -= old_entry.heap_size(&mut ctx); Some(old_entry) } else { None @@ -148,8 +150,9 @@ impl DefaultFileStatisticsCacheState { fn evict_entries(&mut self) { while self.memory_used > self.memory_limit { if let Some(removed) = self.lru_queue.pop() { - self.memory_used -= removed.0.heap_size(); - self.memory_used -= removed.1.heap_size(); + let mut ctx = DFHeapSizeCtx::default(); + self.memory_used -= removed.0.heap_size(&mut ctx); + self.memory_used -= removed.1.heap_size(&mut ctx); } else { // cache is empty while memory_used > memory_limit, cannot happen log::error!( @@ -222,6 +225,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { fn list_entries(&self) -> HashMap { let mut entries = HashMap::::new(); + let mut ctx = DFHeapSizeCtx::default(); for entry in self.state.lock().unwrap().lru_queue.list_entries() { let path = entry.0.clone(); let cached = entry.1; @@ -232,7 +236,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { num_rows: cached.statistics.num_rows, num_columns: cached.statistics.column_statistics.len(), table_size_bytes: cached.statistics.total_byte_size, - statistics_size_bytes: cached.statistics.heap_size(), + statistics_size_bytes: cached.statistics.heap_size(&mut ctx), has_ordering: cached.ordering.is_some(), }, ); @@ -269,6 +273,7 @@ mod tests { use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; use chrono::DateTime; + use datafusion_common::heap_size::DFHeapSizeCtx; use datafusion_common::stats::Precision; use datafusion_common::{ColumnStatistics, ScalarValue, Statistics}; use datafusion_expr::ColumnarValue; @@ -619,10 +624,12 @@ mod tests { let (meta_2, value_2) = create_cached_file_metadata_with_stats("test2.parquet"); let (meta_3, value_3) = create_cached_file_metadata_with_stats("test3.parquet"); - let limit_for_2_entries = meta_1.location.as_ref().heap_size() - + value_1.heap_size() - + meta_2.location.as_ref().heap_size() - + value_2.heap_size(); + let mut ctx = DFHeapSizeCtx::default(); + + let limit_for_2_entries = meta_1.location.as_ref().heap_size(&mut ctx) + + value_1.heap_size(&mut ctx) + + meta_2.location.as_ref().heap_size(&mut ctx) + + value_2.heap_size(&mut ctx); // create a cache with a limit which fits exactly 2 entries let cache = DefaultFileStatisticsCache::new(limit_for_2_entries); @@ -672,11 +679,12 @@ mod tests { cache.put(&path_3, value_3.clone()); assert_eq!(cache.memory_used(), limit_for_2_entries); + let mut ctx = DFHeapSizeCtx::default(); cache.remove(&path_2); assert_eq!(cache.len(), 1); assert_eq!( cache.memory_used(), - meta_3.location.as_ref().heap_size() + value_3.heap_size() + meta_3.location.as_ref().heap_size(&mut ctx) + value_3.heap_size(&mut ctx) ); cache.clear(); @@ -687,8 +695,8 @@ mod tests { #[test] fn test_cache_rejects_entry_which_is_too_large() { let (meta, value) = create_cached_file_metadata_with_stats("test1.parquet"); - - let limit_less_than_the_entry = value.heap_size() - 1; + let mut ctx = DFHeapSizeCtx::default(); + let limit_less_than_the_entry = value.heap_size(&mut ctx) - 1; // create a cache with a size less than the entry let cache = DefaultFileStatisticsCache::new(limit_less_than_the_entry); @@ -727,8 +735,8 @@ mod tests { total_byte_size: Precision::Exact(100), column_statistics: vec![column_statistics.clone()], }; - - let object_meta = create_test_meta(file_name, stats.heap_size() as u64); + let mut ctx = DFHeapSizeCtx::default(); + let object_meta = create_test_meta(file_name, stats.heap_size(&mut ctx) as u64); let value = CachedFileMetadata::new(object_meta.clone(), Arc::new(stats.clone()), None); (object_meta, value) diff --git a/datafusion/execution/src/cache/list_files_cache.rs b/datafusion/execution/src/cache/list_files_cache.rs index e04bb9340b09b..a3cdf7c5e9110 100644 --- a/datafusion/execution/src/cache/list_files_cache.rs +++ b/datafusion/execution/src/cache/list_files_cache.rs @@ -30,7 +30,7 @@ use std::{ }; use datafusion_common::TableReference; -use datafusion_common::heap_size::DFHeapSize; +use datafusion_common::heap_size::{DFHeapSize, DFHeapSizeCtx}; use datafusion_common::instant::Instant; use object_store::{ObjectMeta, path::Path}; @@ -172,8 +172,8 @@ impl Default for DefaultListFilesCacheState { } impl DFHeapSize for TableScopedPath { - fn heap_size(&self) -> usize { - self.path.as_ref().heap_size() + self.table.heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.path.as_ref().heap_size(ctx) + self.table.heap_size(ctx) } } From 077187ea542b317868fc4a3cb31d5cf9b7ab428a Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Mon, 27 Apr 2026 10:13:28 +0200 Subject: [PATCH 68/70] fixup! Improve heap size estimation for Arc --- datafusion/common/src/heap_size.rs | 80 ++++++++++++++----- .../src/cache/file_statistics_cache.rs | 1 + 2 files changed, 60 insertions(+), 21 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 2855091c08e2a..285241c3a6ba4 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -246,7 +246,20 @@ impl DFHeapSize for HashMap { impl DFHeapSize for Arc { fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { - let ptr = Arc::as_ptr(self) as usize; + let ptr = Arc::as_ptr(self) as *const i32 as usize; + + if !ctx.seen.insert(ptr) { + return 0; + } + + // Arc stores weak and strong counts on the heap alongside an instance of T + 2 * size_of::() + self.as_ref().heap_size(ctx) + } +} + +impl DFHeapSize for Arc { + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + let ptr = Arc::as_ptr(self) as *const i32 as usize; if !ctx.seen.insert(ptr) { return 0; @@ -259,6 +272,13 @@ impl DFHeapSize for Arc { impl DFHeapSize for Arc { fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + let ptr = Arc::as_ptr(self) as *const i32 as usize; + + if !ctx.seen.insert(ptr) { + return 0; + } + + // Arc stores weak and strong counts on the heap alongside an instance of T 2 * size_of::() + size_of_val(self.as_ref()) + self.as_ref().heap_size(ctx) } } @@ -310,12 +330,6 @@ impl DFHeapSize for MapArray { } } -impl DFHeapSize for Arc { - fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { - 2 * size_of::() + self.as_ref().heap_size(ctx) - } -} - impl DFHeapSize for Box { fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { size_of::() + self.as_ref().heap_size(ctx) @@ -493,21 +507,45 @@ impl DFHeapSize for usize { } } -#[test] -fn test_heap_size_arc_avoid_double_accounting() { - let a1 = Arc::new(vec![1, 2, 3]); - let mut ctx = DFHeapSizeCtx::default(); - let heap_size = a1.heap_size(&mut ctx); +#[cfg(test)] +mod tests { + use super::*; - let a2 = Arc::clone(&a1); - let a3 = Arc::clone(&a1); - let a4 = Arc::clone(&a3); + #[test] + fn test_heap_size_arc_avoid_double_accounting() { + let a1 = Arc::new(vec![1, 2, 3]); + let mut ctx = DFHeapSizeCtx::default(); + let heap_size = a1.heap_size(&mut ctx); - let mut ctx = DFHeapSizeCtx::default(); - let heap_size_with_clones = a1.heap_size(&mut ctx) - + a2.heap_size(&mut ctx) - + a3.heap_size(&mut ctx) - + a4.heap_size(&mut ctx); + let a2 = Arc::clone(&a1); + let a3 = Arc::clone(&a1); + let a4 = Arc::clone(&a3); - assert_eq!(heap_size, heap_size_with_clones); + let mut ctx = DFHeapSizeCtx::default(); + let heap_size_with_clones = a1.heap_size(&mut ctx) + + a2.heap_size(&mut ctx) + + a3.heap_size(&mut ctx) + + a4.heap_size(&mut ctx); + + assert_eq!(heap_size, heap_size_with_clones); + } + + #[test] + fn test_heap_size_arc_str_avoid_double_accounting() { + let a1 = Arc::new("Hello".to_string()); + let mut ctx = DFHeapSizeCtx::default(); + let heap_size = a1.heap_size(&mut ctx); + + let a2 = Arc::clone(&a1); + let a3 = Arc::clone(&a1); + let a4 = Arc::clone(&a3); + + let mut ctx = DFHeapSizeCtx::default(); + let heap_size_with_clones = a1.heap_size(&mut ctx) + + a2.heap_size(&mut ctx) + + a3.heap_size(&mut ctx) + + a4.heap_size(&mut ctx); + + assert_eq!(heap_size, heap_size_with_clones); + } } diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index fe9b0edccc9a2..f7e675c9f395d 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -110,6 +110,7 @@ impl DefaultFileStatisticsCacheState { } let old_value = self.lru_queue.put(key.clone(), value); + let mut ctx = DFHeapSizeCtx::default(); self.memory_used += entry_size; self.memory_used += key.heap_size(&mut ctx); From 9d1bce23e9a175b2b7bf3f9a9f4b1308620940a9 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Mon, 27 Apr 2026 11:34:06 +0200 Subject: [PATCH 69/70] Update migration guide --- .../library-user-guide/upgrading/54.0.0.md | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/docs/source/library-user-guide/upgrading/54.0.0.md b/docs/source/library-user-guide/upgrading/54.0.0.md index cadd365e1f814..1753965d79fc1 100644 --- a/docs/source/library-user-guide/upgrading/54.0.0.md +++ b/docs/source/library-user-guide/upgrading/54.0.0.md @@ -380,3 +380,42 @@ impl Default for MyTreeNode { } } ``` + +[20047]: https://github.com/apache/datafusion/pull/20047 + +### File statistics cache is now memory-limited and managed by the `CacheManager` + +The file statistics cache used by `ListingTable` is now memory-limited and +centrally managed through the `CacheManager`. + +To configure the cache size use the `file_statistics_cache_limit` setting: + +```sql +SET datafusion.runtime.file_statistics_cache_limit = '10MB' +``` + +To disable the file statistics cache, set the limit to 0. + +The file statistics cache is no longer created inside the `ListingTable`. +Instead, it is created within the `CacheManager` and must be passed to `ListingTable`. + +**Who is affected:** + +- Users who want to limit the memory usage of the file statistics cache. +- Users who want to disable the file statistics. +- Users creating a `ListingTable` programmatically with a file statistics cache + +**Migration guide:** + +Disable the cache by setting the configuration value to 0: + +```sql +SET datafusion.runtime.file_statistics_cache_limit = '0k' +``` + +Use the file statistics cache provided by the CacheManager when initializing a new ListingTable: + +```rust,ignore +ListingTable::try_new(config)? + .with_cache(ctx.runtime_env().cache_manager.get_file_statistic_cache(),) +``` From 5d3dd451250495171c363e29b64a10d29b1cc3b9 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Mon, 27 Apr 2026 11:48:35 +0200 Subject: [PATCH 70/70] fixup! Update migration guide --- docs/source/library-user-guide/upgrading/54.0.0.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/upgrading/54.0.0.md b/docs/source/library-user-guide/upgrading/54.0.0.md index 1753965d79fc1..2921163c9537c 100644 --- a/docs/source/library-user-guide/upgrading/54.0.0.md +++ b/docs/source/library-user-guide/upgrading/54.0.0.md @@ -403,7 +403,7 @@ Instead, it is created within the `CacheManager` and must be passed to `ListingT - Users who want to limit the memory usage of the file statistics cache. - Users who want to disable the file statistics. -- Users creating a `ListingTable` programmatically with a file statistics cache +- Users creating a `ListingTable` programmatically with a file statistics cache. **Migration guide:**