From 567186c88dee0e8181b5ed0541b66219923ca329 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 3 Feb 2025 11:42:27 +0000 Subject: [PATCH 1/7] chore(deps): update rand requirement from 0.8 to 0.9 Updates the requirements on [rand](https://github.com/rust-random/rand) to permit the latest version. - [Release notes](https://github.com/rust-random/rand/releases) - [Changelog](https://github.com/rust-random/rand/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-random/rand/compare/0.8.5...0.9.0) --- updated-dependencies: - dependency-name: rand dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- Cargo.toml | 2 +- datafusion/functions-nested/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 55855d09d50ed..801a522f21d49 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -142,7 +142,7 @@ pbjson = { version = "0.7.0" } # Should match arrow-flight's version of prost. prost = "0.13.1" prost-derive = "0.13.1" -rand = "0.8" +rand = "0.9" recursive = "0.1.1" regex = "1.8" rstest = "0.24.0" diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml index e7254e4125cb0..7ec8f5267b185 100644 --- a/datafusion/functions-nested/Cargo.toml +++ b/datafusion/functions-nested/Cargo.toml @@ -59,7 +59,7 @@ paste = "1.0.14" [dev-dependencies] criterion = { version = "0.5", features = ["async_tokio"] } -rand = "0.8.5" +rand = "0.9.0" [[bench]] harness = false From 5be110d070bc8bc9f91f0e05f841f9846b7e1fe6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 28 Jan 2025 08:36:14 +0000 Subject: [PATCH 2/7] chore(deps): update rand_distr requirement from 0.4.3 to 0.5.0 Updates the requirements on [rand_distr](https://github.com/rust-random/rand) to permit the latest version. - [Release notes](https://github.com/rust-random/rand/releases) - [Changelog](https://github.com/rust-random/rand/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-random/rand/compare/rand_distr-0.4.3...0.5.0) --- updated-dependencies: - dependency-name: rand_distr dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- datafusion/core/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 28c54cb444ce0..b0f8f00eac918 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -146,7 +146,7 @@ doc-comment = { workspace = true } env_logger = { workspace = true } paste = "^1.0" rand = { workspace = true, features = ["small_rng"] } -rand_distr = "0.4.3" +rand_distr = "0.5.0" regex = { workspace = true } rstest = { workspace = true } serde_json = { workspace = true } From aed0965d7f5d58ea8134a262efb37fb43cb05567 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 3 Feb 2025 16:35:42 +0100 Subject: [PATCH 3/7] Migrate to 0.9 --- datafusion/common/Cargo.toml | 2 +- datafusion/common/src/scalar/mod.rs | 38 +++++++++--------- datafusion/core/Cargo.toml | 4 +- datafusion/core/benches/data_utils/mod.rs | 17 ++++---- datafusion/core/benches/map_query_sql.rs | 6 +-- datafusion/core/benches/parquet_query_sql.rs | 24 +++++------ datafusion/core/benches/sort.rs | 8 ++-- datafusion/core/benches/sql_query_with_io.rs | 2 +- .../src/datasource/file_format/write/demux.rs | 8 ++-- .../physical_plan/parquet/row_filter.rs | 8 ++-- datafusion/core/tests/dataframe/mod.rs | 8 ++-- .../core/tests/fuzz_cases/aggregate_fuzz.rs | 24 +++++------ .../aggregation_fuzzer/context_generator.rs | 12 +++--- .../aggregation_fuzzer/data_generator.rs | 34 +++++++++------- .../fuzz_cases/aggregation_fuzzer/fuzzer.rs | 26 ++++++------ .../tests/fuzz_cases/equivalence/utils.rs | 6 +-- datafusion/core/tests/fuzz_cases/join_fuzz.rs | 4 +- .../core/tests/fuzz_cases/limit_fuzz.rs | 30 +++++++------- datafusion/core/tests/fuzz_cases/pruning.rs | 4 +- datafusion/core/tests/fuzz_cases/sort_fuzz.rs | 6 +-- .../sort_preserving_repartition_fuzz.rs | 14 +++---- .../core/tests/fuzz_cases/window_fuzz.rs | 40 +++++++++---------- datafusion/execution/Cargo.toml | 2 +- datafusion/execution/src/disk_manager.rs | 4 +- .../functions-aggregate-common/Cargo.toml | 2 +- .../groups_accumulator/accumulate.rs | 18 ++++----- datafusion/functions-aggregate/Cargo.toml | 1 - .../functions-aggregate/benches/array_agg.rs | 29 +++++++++----- datafusion/functions-aggregate/src/min_max.rs | 4 +- datafusion/functions-nested/Cargo.toml | 2 +- datafusion/functions-nested/benches/map.rs | 8 ++-- datafusion/functions/Cargo.toml | 2 +- datafusion/functions/benches/date_bin.rs | 4 +- datafusion/functions/benches/find_in_set.rs | 8 ++-- datafusion/functions/benches/helper.rs | 6 +-- datafusion/functions/benches/ltrim.rs | 4 +- datafusion/functions/benches/make_date.rs | 12 +++--- datafusion/functions/benches/pad.rs | 8 ++-- datafusion/functions/benches/regx.rs | 16 ++++---- datafusion/functions/benches/strpos.rs | 10 ++--- datafusion/functions/benches/substr_index.rs | 10 ++--- datafusion/functions/benches/to_char.rs | 8 ++-- datafusion/functions/src/math/random.rs | 4 +- datafusion/physical-expr/benches/in_list.rs | 12 +++--- .../physical-expr/src/intervals/cp_solver.rs | 8 ++-- .../physical-plan/src/joins/test_utils.rs | 4 +- test-utils/Cargo.toml | 2 +- test-utils/src/array_gen/binary.rs | 12 +++--- test-utils/src/array_gen/boolean.rs | 6 +-- test-utils/src/array_gen/decimal.rs | 4 +- test-utils/src/array_gen/primitive.rs | 9 +++-- test-utils/src/array_gen/random_data.rs | 20 +++++----- test-utils/src/array_gen/string.rs | 14 +++---- test-utils/src/data_gen.rs | 33 +++++++-------- test-utils/src/lib.rs | 6 +-- test-utils/src/string_gen.rs | 12 +++--- 56 files changed, 321 insertions(+), 308 deletions(-) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index fe6d652be700a..dcfcd35b239f5 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -74,4 +74,4 @@ web-time = "1.1.0" [dev-dependencies] chrono = { workspace = true } -rand = { workspace = true } +rand = { workspace = true, features = ["thread_rng"] } diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 9d3429b677963..0c1f3c0fe97ef 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -7130,14 +7130,14 @@ mod tests { fn get_random_timestamps(sample_size: u64) -> Vec { let vector_size = sample_size; let mut timestamp = vec![]; - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); for i in 0..vector_size { - let year = rng.gen_range(1995..=2050); - let month = rng.gen_range(1..=12); - let day = rng.gen_range(1..=28); // to exclude invalid dates - let hour = rng.gen_range(0..=23); - let minute = rng.gen_range(0..=59); - let second = rng.gen_range(0..=59); + let year = rng.random_range(1995..=2050); + let month = rng.random_range(1..=12); + let day = rng.random_range(1..=28); // to exclude invalid dates + let hour = rng.random_range(0..=23); + let minute = rng.random_range(0..=59); + let second = rng.random_range(0..=59); if i % 4 == 0 { timestamp.push(ScalarValue::TimestampSecond( Some( @@ -7151,7 +7151,7 @@ mod tests { None, )) } else if i % 4 == 1 { - let millisec = rng.gen_range(0..=999); + let millisec = rng.random_range(0..=999); timestamp.push(ScalarValue::TimestampMillisecond( Some( NaiveDate::from_ymd_opt(year, month, day) @@ -7164,7 +7164,7 @@ mod tests { None, )) } else if i % 4 == 2 { - let microsec = rng.gen_range(0..=999_999); + let microsec = rng.random_range(0..=999_999); timestamp.push(ScalarValue::TimestampMicrosecond( Some( NaiveDate::from_ymd_opt(year, month, day) @@ -7177,7 +7177,7 @@ mod tests { None, )) } else if i % 4 == 3 { - let nanosec = rng.gen_range(0..=999_999_999); + let nanosec = rng.random_range(0..=999_999_999); timestamp.push(ScalarValue::TimestampNanosecond( Some( NaiveDate::from_ymd_opt(year, month, day) @@ -7201,27 +7201,27 @@ mod tests { let vector_size = sample_size; let mut intervals = vec![]; - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); const SECS_IN_ONE_DAY: i32 = 86_400; const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000; for i in 0..vector_size { if i % 4 == 0 { - let days = rng.gen_range(0..5000); + let days = rng.random_range(0..5000); // to not break second precision - let millis = rng.gen_range(0..SECS_IN_ONE_DAY) * 1000; + let millis = rng.random_range(0..SECS_IN_ONE_DAY) * 1000; intervals.push(ScalarValue::new_interval_dt(days, millis)); } else if i % 4 == 1 { - let days = rng.gen_range(0..5000); - let millisec = rng.gen_range(0..(MILLISECS_IN_ONE_DAY as i32)); + let days = rng.random_range(0..5000); + let millisec = rng.random_range(0..(MILLISECS_IN_ONE_DAY as i32)); intervals.push(ScalarValue::new_interval_dt(days, millisec)); } else if i % 4 == 2 { - let days = rng.gen_range(0..5000); + let days = rng.random_range(0..5000); // to not break microsec precision - let nanosec = rng.gen_range(0..MICROSECS_IN_ONE_DAY) * 1000; + let nanosec = rng.random_range(0..MICROSECS_IN_ONE_DAY) * 1000; intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec)); } else { - let days = rng.gen_range(0..5000); - let nanosec = rng.gen_range(0..NANOSECS_IN_ONE_DAY); + let days = rng.random_range(0..5000); + let nanosec = rng.random_range(0..NANOSECS_IN_ONE_DAY); intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec)); } } diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index b0f8f00eac918..89528425b38ad 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -124,7 +124,7 @@ num-traits = { version = "0.2", optional = true } object_store = { workspace = true } parking_lot = { workspace = true } parquet = { workspace = true, optional = true, default-features = true } -rand = { workspace = true } +rand = { workspace = true, features = ["thread_rng"] } regex = { workspace = true } sqlparser = { workspace = true } tempfile = { workspace = true } @@ -145,7 +145,7 @@ datafusion-physical-optimizer = { workspace = true } doc-comment = { workspace = true } env_logger = { workspace = true } paste = "^1.0" -rand = { workspace = true, features = ["small_rng"] } +rand = { workspace = true, features = ["small_rng", "thread_rng"] } rand_distr = "0.5.0" regex = { workspace = true } rstest = { workspace = true } diff --git a/datafusion/core/benches/data_utils/mod.rs b/datafusion/core/benches/data_utils/mod.rs index 9d2864919225a..80b2a9c96a7a0 100644 --- a/datafusion/core/benches/data_utils/mod.rs +++ b/datafusion/core/benches/data_utils/mod.rs @@ -29,8 +29,7 @@ use arrow_array::builder::{Int64Builder, StringBuilder}; use datafusion::datasource::MemTable; use datafusion::error::Result; use datafusion_common::DataFusionError; -use rand::rngs::StdRng; -use rand::seq::SliceRandom; +use rand::{rngs::StdRng, seq::IndexedRandom}; use rand::{Rng, SeedableRng}; use rand_distr::Distribution; use rand_distr::{Normal, Pareto}; @@ -80,10 +79,10 @@ fn create_data(size: usize, null_density: f64) -> Vec> { (0..size) .map(|_| { - if rng.gen::() > null_density { + if rng.random::() > null_density { None } else { - Some(rng.gen::()) + Some(rng.random::()) } }) .collect() @@ -95,10 +94,10 @@ fn create_integer_data(size: usize, value_density: f64) -> Vec> { (0..size) .map(|_| { - if rng.gen::() > value_density { + if rng.random::() > value_density { None } else { - Some(rng.gen::()) + Some(rng.random::()) } }) .collect() @@ -128,7 +127,7 @@ fn create_record_batch( // Integer values between [0, 9]. let integer_values_narrow = (0..batch_size) - .map(|_| rng.gen_range(0_u64..10)) + .map(|_| rng.random_range(0_u64..10)) .collect::>(); RecordBatch::try_new( @@ -188,7 +187,7 @@ pub(crate) fn make_data( let mut id_builder = StringBuilder::new(); let mut ts_builder = Int64Builder::new(); let gen_id = |rng: &mut rand::rngs::SmallRng| { - rng.gen::<[u8; 16]>() + rng.random::<[u8; 16]>() .iter() .fold(String::new(), |mut output, b| { let _ = write!(output, "{b:02X}"); @@ -204,7 +203,7 @@ pub(crate) fn make_data( .map(|_| gen_sample_cnt(&mut rng)) .collect::>(); for _ in 0..sample_cnt { - let random_index = rng.gen_range(0..simultaneous_group_cnt); + let random_index = rng.random_range(0..simultaneous_group_cnt); let trace_id = &mut group_ids[random_index]; let sample_cnt = &mut group_sample_cnts[random_index]; *sample_cnt -= 1; diff --git a/datafusion/core/benches/map_query_sql.rs b/datafusion/core/benches/map_query_sql.rs index e4c5f7c5deb3b..13c1891632fdc 100644 --- a/datafusion/core/benches/map_query_sql.rs +++ b/datafusion/core/benches/map_query_sql.rs @@ -34,7 +34,7 @@ mod data_utils; fn build_keys(rng: &mut ThreadRng) -> Vec { let mut keys = vec![]; for _ in 0..1000 { - keys.push(rng.gen_range(0..9999).to_string()); + keys.push(rng.random_range(0..9999).to_string()); } keys } @@ -42,7 +42,7 @@ fn build_keys(rng: &mut ThreadRng) -> Vec { fn build_values(rng: &mut ThreadRng) -> Vec { let mut values = vec![]; for _ in 0..1000 { - values.push(rng.gen_range(0..9999)); + values.push(rng.random_range(0..9999)); } values } @@ -64,7 +64,7 @@ fn criterion_benchmark(c: &mut Criterion) { let rt = Runtime::new().unwrap(); let df = rt.block_on(ctx.lock().table("t")).unwrap(); - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let keys = build_keys(&mut rng); let values = build_values(&mut rng); let mut key_buffer = Vec::new(); diff --git a/datafusion/core/benches/parquet_query_sql.rs b/datafusion/core/benches/parquet_query_sql.rs index f82a126c56520..906a6a6e6c986 100644 --- a/datafusion/core/benches/parquet_query_sql.rs +++ b/datafusion/core/benches/parquet_query_sql.rs @@ -29,9 +29,9 @@ use datafusion_common::instant::Instant; use futures::stream::StreamExt; use parquet::arrow::ArrowWriter; use parquet::file::properties::{WriterProperties, WriterVersion}; -use rand::distributions::uniform::SampleUniform; -use rand::distributions::Alphanumeric; -use rand::prelude::*; +use rand::distr::uniform::SampleUniform; +use rand::distr::Alphanumeric; +use rand::{prelude::*, rng}; use std::fs::File; use std::io::Read; use std::ops::Range; @@ -97,13 +97,13 @@ fn generate_string_dictionary( len: usize, valid_percent: f64, ) -> ArrayRef { - let mut rng = thread_rng(); + let mut rng = rng(); let strings: Vec<_> = (0..cardinality).map(|x| format!("{prefix}#{x}")).collect(); Arc::new(DictionaryArray::::from_iter((0..len).map( |_| { - rng.gen_bool(valid_percent) - .then(|| strings[rng.gen_range(0..cardinality)].as_str()) + rng.random_bool(valid_percent) + .then(|| strings[rng.random_range(0..cardinality)].as_str()) }, ))) } @@ -113,10 +113,10 @@ fn generate_strings( len: usize, valid_percent: f64, ) -> ArrayRef { - let mut rng = thread_rng(); + let mut rng = rng(); Arc::new(StringArray::from_iter((0..len).map(|_| { - rng.gen_bool(valid_percent).then(|| { - let string_len = rng.gen_range(string_length_range.clone()); + rng.random_bool(valid_percent).then(|| { + let string_len = rng.random_range(string_length_range.clone()); (0..string_len) .map(|_| char::from(rng.sample(Alphanumeric))) .collect::() @@ -133,10 +133,10 @@ where T: ArrowPrimitiveType, T::Native: SampleUniform, { - let mut rng = thread_rng(); + let mut rng = rng(); Arc::new(PrimitiveArray::::from_iter((0..len).map(|_| { - rng.gen_bool(valid_percent) - .then(|| rng.gen_range(range.clone())) + rng.random_bool(valid_percent) + .then(|| rng.random_range(range.clone())) }))) } diff --git a/datafusion/core/benches/sort.rs b/datafusion/core/benches/sort.rs index 14e80ce364e33..3e743999e3b42 100644 --- a/datafusion/core/benches/sort.rs +++ b/datafusion/core/benches/sort.rs @@ -488,7 +488,7 @@ impl DataGenerator { /// Create an array of i64 sorted values (where approximately 1/3 values is repeated) fn i64_values(&mut self) -> Vec { let mut vec: Vec<_> = (0..INPUT_SIZE) - .map(|_| self.rng.gen_range(0..INPUT_SIZE as i64)) + .map(|_| self.rng.random_range(0..INPUT_SIZE as i64)) .collect(); vec.sort_unstable(); @@ -513,7 +513,7 @@ impl DataGenerator { // pick from the 100 strings randomly let mut input = (0..INPUT_SIZE) .map(|_| { - let idx = self.rng.gen_range(0..strings.len()); + let idx = self.rng.random_range(0..strings.len()); let s = Arc::clone(&strings[idx]); Some(s) }) @@ -536,7 +536,7 @@ impl DataGenerator { fn random_string(&mut self) -> String { let rng = &mut self.rng; - rng.sample_iter(rand::distributions::Alphanumeric) + rng.sample_iter(rand::distr::Alphanumeric) .filter(|c| c.is_ascii_alphabetic()) .take(20) .map(char::from) @@ -558,7 +558,7 @@ where let mut outputs: Vec>> = (0..NUM_STREAMS).map(|_| Vec::new()).collect(); for i in input { - let stream_idx = rng.gen_range(0..NUM_STREAMS); + let stream_idx = rng.random_range(0..NUM_STREAMS); let stream = &mut outputs[stream_idx]; match stream.last_mut() { Some(x) if x.len() < BATCH_SIZE => x.push(i), diff --git a/datafusion/core/benches/sql_query_with_io.rs b/datafusion/core/benches/sql_query_with_io.rs index aef39a04e47e8..7bc16cb54b4e0 100644 --- a/datafusion/core/benches/sql_query_with_io.rs +++ b/datafusion/core/benches/sql_query_with_io.rs @@ -67,7 +67,7 @@ fn create_parquet_file(rng: &mut StdRng, id_offset: usize) -> Bytes { let mut payload_builder = Int64Builder::new(); for row in 0..FILE_ROWS { id_builder.append_value((row + id_offset) as u64); - payload_builder.append_value(rng.gen()); + payload_builder.append_value(rng.random()); } let batch = RecordBatch::try_new( Arc::clone(&schema), diff --git a/datafusion/core/src/datasource/file_format/write/demux.rs b/datafusion/core/src/datasource/file_format/write/demux.rs index 48db2c0802559..959c963015fe4 100644 --- a/datafusion/core/src/datasource/file_format/write/demux.rs +++ b/datafusion/core/src/datasource/file_format/write/demux.rs @@ -42,7 +42,7 @@ use datafusion_execution::TaskContext; use chrono::NaiveDate; use futures::StreamExt; use object_store::path::Path; -use rand::distributions::DistString; +use rand::distr::SampleString; use tokio::sync::mpsc::{self, Receiver, Sender, UnboundedReceiver, UnboundedSender}; type RecordBatchReceiver = Receiver; @@ -148,8 +148,7 @@ async fn row_count_demuxer( let max_buffered_batches = exec_options.max_buffered_batches_per_output_file; let minimum_parallel_files = exec_options.minimum_parallel_output_files; let mut part_idx = 0; - let write_id = - rand::distributions::Alphanumeric.sample_string(&mut rand::thread_rng(), 16); + let write_id = rand::distr::Alphanumeric.sample_string(&mut rand::rng(), 16); let mut open_file_streams = Vec::with_capacity(minimum_parallel_files); @@ -264,8 +263,7 @@ async fn hive_style_partitions_demuxer( file_extension: String, keep_partition_by_columns: bool, ) -> Result<()> { - let write_id = - rand::distributions::Alphanumeric.sample_string(&mut rand::thread_rng(), 16); + let write_id = rand::distr::Alphanumeric.sample_string(&mut rand::rng(), 16); let exec_options = &context.session_config().options().execution; let max_buffered_recordbatches = exec_options.max_buffered_batches_per_output_file; diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs index f6428a693fb17..6b5e49e7b6f75 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs @@ -597,7 +597,8 @@ mod test { use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use parquet::arrow::parquet_to_arrow_schema; use parquet::file::reader::{FileReader, SerializedFileReader}; - use rand::prelude::*; + use rand::rng; + use rand_distr::uniform::{UniformSampler, UniformUsize}; // We should ignore predicate that read non-primitive columns #[test] @@ -749,10 +750,11 @@ mod test { #[test] fn test_remap_projection() { - let mut rng = thread_rng(); + let mut rng = rng(); + let uusize = UniformUsize::new(usize::MIN, usize::MAX).unwrap(); for _ in 0..100 { // A random selection of column indexes in arbitrary order - let projection: Vec<_> = (0..100).map(|_| rng.gen()).collect(); + let projection: Vec<_> = (0..100).map(|_| uusize.sample(&mut rng)).collect(); // File order is the projection sorted let mut file_order = projection.clone(); diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index e570ec75c691a..72456a26b6148 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -4249,7 +4249,7 @@ async fn table_with_nested_types(n: usize) -> Result { shape_id_builder.append_value(idx as u32 + 1); // Add a random number of points - let num_points: usize = rng.gen_range(0..4); + let num_points: usize = rng.random_range(0..4); if num_points > 0 { for _ in 0..num_points.max(2) { // Add x value @@ -4257,13 +4257,13 @@ async fn table_with_nested_types(n: usize) -> Result { .values() .field_builder::(0) .unwrap() - .append_value(rng.gen_range(-10..10)); + .append_value(rng.random_range(-10..10)); // Add y value points_builder .values() .field_builder::(1) .unwrap() - .append_value(rng.gen_range(-10..10)); + .append_value(rng.random_range(-10..10)); points_builder.values().append(true); } } @@ -4272,7 +4272,7 @@ async fn table_with_nested_types(n: usize) -> Result { points_builder.append(num_points > 0); // Append tags. - let num_tags: usize = rng.gen_range(0..5); + let num_tags: usize = rng.random_range(0..5); for id in 0..num_tags { tags_builder.values().append_value(format!("tag{}", id + 1)); } diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs index bcd88bae739ae..9e647f8116a4f 100644 --- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs @@ -49,7 +49,7 @@ use crate::fuzz_cases::aggregation_fuzzer::{ use datafusion_common::HashMap; use datafusion_physical_expr_common::sort_expr::LexOrdering; use rand::rngs::StdRng; -use rand::{thread_rng, Rng, SeedableRng}; +use rand::{rng, Rng, SeedableRng}; use std::str; use tokio::task::JoinSet; @@ -176,7 +176,7 @@ async fn test_median() { /// 1. Floating point numbers /// 1. structured types fn baseline_config() -> DatasetGeneratorConfig { - let mut rng = thread_rng(); + let mut rng = rng(); let columns = vec![ ColumnDescr::new("i8", DataType::Int8), ColumnDescr::new("i16", DataType::Int16), @@ -224,18 +224,18 @@ fn baseline_config() -> DatasetGeneratorConfig { // begin decimal columns ColumnDescr::new("decimal128", { // Generate valid precision and scale for Decimal128 randomly. - let precision: u8 = rng.gen_range(1..=DECIMAL128_MAX_PRECISION); + let precision: u8 = rng.random_range(1..=DECIMAL128_MAX_PRECISION); // It's safe to cast `precision` to i8 type directly. - let scale: i8 = rng.gen_range( + let scale: i8 = rng.random_range( i8::MIN..=std::cmp::min(precision as i8, DECIMAL128_MAX_SCALE), ); DataType::Decimal128(precision, scale) }), ColumnDescr::new("decimal256", { // Generate valid precision and scale for Decimal256 randomly. - let precision: u8 = rng.gen_range(1..=DECIMAL256_MAX_PRECISION); + let precision: u8 = rng.random_range(1..=DECIMAL256_MAX_PRECISION); // It's safe to cast `precision` to i8 type directly. - let scale: i8 = rng.gen_range( + let scale: i8 = rng.random_range( i8::MIN..=std::cmp::min(precision as i8, DECIMAL256_MAX_SCALE), ); DataType::Decimal256(precision, scale) @@ -436,13 +436,13 @@ pub(crate) fn make_staggered_batches( let mut input4: Vec = vec![0; len]; input123.iter_mut().for_each(|v| { *v = ( - rng.gen_range(0..n_distinct) as i64, - rng.gen_range(0..n_distinct) as i64, - rng.gen_range(0..n_distinct) as i64, + rng.random_range(0..n_distinct) as i64, + rng.random_range(0..n_distinct) as i64, + rng.random_range(0..n_distinct) as i64, ) }); input4.iter_mut().for_each(|v| { - *v = rng.gen_range(0..n_distinct) as i64; + *v = rng.random_range(0..n_distinct) as i64; }); input123.sort(); let input1 = Int64Array::from_iter_values(input123.clone().into_iter().map(|k| k.0)); @@ -462,7 +462,7 @@ pub(crate) fn make_staggered_batches( let mut batches = vec![]; if STREAM { while remainder.num_rows() > 0 { - let batch_size = rng.gen_range(0..50); + let batch_size = rng.random_range(0..50); if remainder.num_rows() < batch_size { break; } @@ -471,7 +471,7 @@ pub(crate) fn make_staggered_batches( } } else { while remainder.num_rows() > 0 { - let batch_size = rng.gen_range(0..remainder.num_rows() + 1); + let batch_size = rng.random_range(0..remainder.num_rows() + 1); batches.push(remainder.slice(0, batch_size)); remainder = remainder.slice(batch_size, remainder.num_rows() - batch_size); } diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs index 2aeecd8ff2eae..4832376ef9ba6 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs @@ -25,7 +25,7 @@ use datafusion_catalog::TableProvider; use datafusion_common::ScalarValue; use datafusion_common::{error::Result, utils::get_available_parallelism}; use datafusion_expr::col; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; use crate::fuzz_cases::aggregation_fuzzer::data_generator::Dataset; @@ -112,7 +112,7 @@ impl SessionContextGenerator { /// Randomly generate session context pub fn generate(&self) -> Result { - let mut rng = thread_rng(); + let mut rng = rng(); let schema = self.dataset.batches[0].schema(); let batches = self.dataset.batches.clone(); let provider = MemTable::try_new(schema, vec![batches])?; @@ -123,17 +123,17 @@ impl SessionContextGenerator { // - `skip_partial`, trigger or not trigger currently for simplicity // - `sorted`, if found a sorted dataset, will or will not push down this information // - `spilling`(TODO) - let batch_size = rng.gen_range(1..=self.max_batch_size); + let batch_size = rng.random_range(1..=self.max_batch_size); - let target_partitions = rng.gen_range(1..=self.max_target_partitions); + let target_partitions = rng.random_range(1..=self.max_target_partitions); let skip_partial_params_idx = - rng.gen_range(0..self.candidate_skip_partial_params.len()); + rng.random_range(0..self.candidate_skip_partial_params.len()); let skip_partial_params = self.candidate_skip_partial_params[skip_partial_params_idx]; let (provider, sort_hint) = - if rng.gen_bool(0.5) && !self.dataset.sort_keys.is_empty() { + if rng.random_bool(0.5) && !self.dataset.sort_keys.is_empty() { // Sort keys exist and random to push down let sort_exprs = self .dataset diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs index e4c0cb6fe77f7..a42eed2dc278a 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs @@ -33,8 +33,9 @@ use datafusion_physical_expr::{expressions::col, PhysicalSortExpr}; use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_plan::sorts::sort::sort_batch; use rand::{ + rng, rngs::{StdRng, ThreadRng}, - thread_rng, Rng, SeedableRng, + Rng, SeedableRng, }; use test_utils::{ array_gen::{ @@ -50,7 +51,7 @@ use test_utils::{ /// - `columns`, you just need to define `column name`s and `column data type`s /// for the test datasets, and then they will be randomly generated from the generator /// when you call `generate` function -/// +/// /// - `rows_num_range`, the number of rows in the datasets will be randomly generated /// within this range /// @@ -110,7 +111,7 @@ impl DatasetGeneratorConfig { /// /// - Sort the batch according to `sort_keys` in `config` to generate another /// `len(sort_keys)` sorted batches. -/// +/// /// - Split each batch to multiple batches which each sub-batch in has the randomly `rows num`, /// and this multiple batches will be used to create the `Dataset`. /// @@ -230,9 +231,10 @@ struct RecordBatchGenerator { macro_rules! generate_string_array { ($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE: ident) => {{ - let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len()); + let null_pct_idx = + $BATCH_GEN_RNG.random_range(0..$SELF.candidate_null_pcts.len()); let null_pct = $SELF.candidate_null_pcts[null_pct_idx]; - let max_len = $BATCH_GEN_RNG.gen_range(1..50); + let max_len = $BATCH_GEN_RNG.random_range(1..50); let mut generator = StringArrayGenerator { max_len, @@ -253,7 +255,8 @@ macro_rules! generate_string_array { macro_rules! generate_decimal_array { ($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT: expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $PRECISION: ident, $SCALE: ident, $ARROW_TYPE: ident) => {{ - let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len()); + let null_pct_idx = + $BATCH_GEN_RNG.random_range(0..$SELF.candidate_null_pcts.len()); let null_pct = $SELF.candidate_null_pcts[null_pct_idx]; let mut generator = DecimalArrayGenerator { @@ -273,7 +276,8 @@ macro_rules! generate_decimal_array { macro_rules! generate_boolean_array { ($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE: ident) => {{ // Select a null percentage from the candidate percentages - let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len()); + let null_pct_idx = + $BATCH_GEN_RNG.random_range(0..$SELF.candidate_null_pcts.len()); let null_pct = $SELF.candidate_null_pcts[null_pct_idx]; let num_distinct_booleans = if $MAX_NUM_DISTINCT >= 2 { 2 } else { 1 }; @@ -291,7 +295,8 @@ macro_rules! generate_boolean_array { macro_rules! generate_primitive_array { ($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE:ident) => {{ - let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len()); + let null_pct_idx = + $BATCH_GEN_RNG.random_range(0..$SELF.candidate_null_pcts.len()); let null_pct = $SELF.candidate_null_pcts[null_pct_idx]; let mut generator = PrimitiveArrayGenerator { @@ -314,10 +319,11 @@ macro_rules! generate_binary_array { $ARRAY_GEN_RNG:ident, $ARROW_TYPE:ident ) => {{ - let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len()); + let null_pct_idx = + $BATCH_GEN_RNG.random_range(0..$SELF.candidate_null_pcts.len()); let null_pct = $SELF.candidate_null_pcts[null_pct_idx]; - let max_len = $BATCH_GEN_RNG.gen_range(1..100); + let max_len = $BATCH_GEN_RNG.random_range(1..100); let mut generator = BinaryArrayGenerator { max_len, @@ -349,9 +355,9 @@ impl RecordBatchGenerator { } fn generate(&self) -> Result { - let mut rng = thread_rng(); - let num_rows = rng.gen_range(self.min_rows_nun..=self.max_rows_num); - let array_gen_rng = StdRng::from_seed(rng.gen()); + let mut rng = rng(); + let num_rows = rng.random_range(self.min_rows_nun..=self.max_rows_num); + let array_gen_rng = StdRng::from_seed(rng.random()); // Build arrays let mut arrays = Vec::with_capacity(self.columns.len()); @@ -384,7 +390,7 @@ impl RecordBatchGenerator { array_gen_rng: StdRng, ) -> ArrayRef { let num_distinct = if num_rows > 1 { - batch_gen_rng.gen_range(1..num_rows) + batch_gen_rng.random_range(1..num_rows) } else { num_rows }; diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs index d021e73f35b20..19f7e7ba27fc1 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs @@ -21,7 +21,7 @@ use std::sync::Arc; use arrow::util::pretty::pretty_format_batches; use arrow_array::RecordBatch; use datafusion_common::{DataFusionError, Result}; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; use tokio::task::JoinSet; use crate::fuzz_cases::aggregation_fuzzer::{ @@ -177,7 +177,7 @@ impl AggregationFuzzer { async fn run_inner(&self) -> Result<()> { let mut join_set = JoinSet::new(); - let mut rng = thread_rng(); + let mut rng = rng(); // Loop to generate datasets and its query for _ in 0..self.data_gen_rounds { @@ -191,7 +191,7 @@ impl AggregationFuzzer { let query_groups = datasets .into_iter() .map(|dataset| { - let sql_idx = rng.gen_range(0..self.candidate_sqls.len()); + let sql_idx = rng.random_range(0..self.candidate_sqls.len()); let sql = self.candidate_sqls[sql_idx].clone(); QueryGroup { dataset, sql } @@ -262,10 +262,10 @@ impl AggregationFuzzer { /// It includes: /// - `expected_result`, the expected result generated by baseline [`SessionContext`] /// (disable all possible optimizations for ensuring correctness). -/// +/// /// - `ctx`, a randomly generated [`SessionContext`], `sql` will be run /// on it after, and check if the result is equal to expected. -/// +/// /// - `sql`, the selected test sql /// /// - `dataset_ref`, the input dataset, store it for error reported when found @@ -476,14 +476,14 @@ impl QueryBuilder { /// * `alias` is a unique alias `colN` for the column (to avoid duplicate column names) fn random_aggregate_functions(&self) -> Vec { const MAX_NUM_FUNCTIONS: usize = 5; - let mut rng = thread_rng(); - let num_aggregate_functions = rng.gen_range(1..MAX_NUM_FUNCTIONS); + let mut rng = rng(); + let num_aggregate_functions = rng.random_range(1..MAX_NUM_FUNCTIONS); let mut alias_gen = 1; let mut aggregate_functions = vec![]; while aggregate_functions.len() < num_aggregate_functions { - let idx = rng.gen_range(0..self.aggregate_functions.len()); + let idx = rng.random_range(0..self.aggregate_functions.len()); let (function_name, is_distinct) = &self.aggregate_functions[idx]; let argument = self.random_argument(); let alias = format!("col{}", alias_gen); @@ -497,8 +497,8 @@ impl QueryBuilder { /// Pick a random aggregate function argument fn random_argument(&self) -> String { - let mut rng = thread_rng(); - let idx = rng.gen_range(0..self.arguments.len()); + let mut rng = rng(); + let idx = rng.random_range(0..self.arguments.len()); self.arguments[idx].clone() } @@ -507,17 +507,17 @@ impl QueryBuilder { /// Limited to 3 group by columns to ensure coverage for large groups. With /// larger numbers of columns, each group has many fewer values. fn random_group_by(&self) -> Vec { - let mut rng = thread_rng(); + let mut rng = rng(); const MAX_GROUPS: usize = 3; let max_groups = self.group_by_columns.len().max(MAX_GROUPS); - let num_group_by = rng.gen_range(1..max_groups); + let num_group_by = rng.random_range(1..max_groups); let mut already_used = HashSet::new(); let mut group_by = vec![]; while group_by.len() < num_group_by && already_used.len() != self.group_by_columns.len() { - let idx = rng.gen_range(0..self.group_by_columns.len()); + let idx = rng.random_range(0..self.group_by_columns.len()); if already_used.insert(idx) { group_by.push(self.group_by_columns[idx].clone()); } diff --git a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs index 5bf42ea6889f4..c558c68354674 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs @@ -112,7 +112,7 @@ pub fn create_random_schema(seed: u64) -> Result<(SchemaRef, EquivalenceProperti }; while !remaining_exprs.is_empty() { - let n_sort_expr = rng.gen_range(0..remaining_exprs.len() + 1); + let n_sort_expr = rng.random_range(0..remaining_exprs.len() + 1); remaining_exprs.shuffle(&mut rng); let ordering = remaining_exprs @@ -367,7 +367,7 @@ pub fn generate_table_for_eq_properties( // Utility closure to generate random array let mut generate_random_array = |num_elems: usize, max_val: usize| -> ArrayRef { let values: Vec = (0..num_elems) - .map(|_| rng.gen_range(0..max_val) as f64 / 2.0) + .map(|_| rng.random_range(0..max_val) as f64 / 2.0) .collect(); Arc::new(Float64Array::from_iter_values(values)) }; @@ -522,7 +522,7 @@ fn generate_random_f64_array( rng: &mut StdRng, ) -> ArrayRef { let values: Vec = (0..n_elems) - .map(|_| rng.gen_range(0..n_distinct) as f64 / 2.0) + .map(|_| rng.random_range(0..n_distinct) as f64 / 2.0) .collect(); Arc::new(Float64Array::from_iter_values(values)) } diff --git a/datafusion/core/tests/fuzz_cases/join_fuzz.rs b/datafusion/core/tests/fuzz_cases/join_fuzz.rs index 41c12193f0187..2249a0f658276 100644 --- a/datafusion/core/tests/fuzz_cases/join_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/join_fuzz.rs @@ -737,13 +737,13 @@ impl JoinFuzzTestCase { /// two sorted int32 columns 'a', 'b' ranged from 0..99 as join columns /// two random int32 columns 'x', 'y' as other columns fn make_staggered_batches(len: usize) -> Vec { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let mut input12: Vec<(i32, i32)> = vec![(0, 0); len]; let mut input3: Vec = vec![0; len]; let mut input4: Vec = vec![0; len]; input12 .iter_mut() - .for_each(|v| *v = (rng.gen_range(0..100), rng.gen_range(0..100))); + .for_each(|v| *v = (rng.random_range(0..100), rng.random_range(0..100))); rng.fill(&mut input3[..]); rng.fill(&mut input4[..]); input12.sort_unstable(); diff --git a/datafusion/core/tests/fuzz_cases/limit_fuzz.rs b/datafusion/core/tests/fuzz_cases/limit_fuzz.rs index a82849f4ea929..1365c9b915f18 100644 --- a/datafusion/core/tests/fuzz_cases/limit_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/limit_fuzz.rs @@ -25,7 +25,7 @@ use arrow_schema::SchemaRef; use datafusion::datasource::MemTable; use datafusion::prelude::SessionContext; use datafusion_common::assert_contains; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; use std::sync::Arc; use test_utils::stagger_batch; @@ -55,11 +55,11 @@ async fn run_limit_fuzz_test(make_data: F) where F: Fn(usize) -> SortedData, { - let mut rng = thread_rng(); + let mut rng = rng(); for size in [10, 1_0000, 10_000, 100_000] { let data = make_data(size); // test various limits including some random ones - for limit in [1, 3, 7, 17, 10000, rng.gen_range(1..size * 2)] { + for limit in [1, 3, 7, 17, 10000, rng.random_range(1..size * 2)] { // limit can be larger than the number of rows in the input run_limit_test(limit, &data).await; } @@ -98,13 +98,13 @@ impl SortedData { /// Create an i32 column of random values, with the specified number of /// rows, sorted the default fn new_i32(size: usize) -> Self { - let mut rng = thread_rng(); + let mut rng = rng(); // have some repeats (approximately 1/3 of the values are the same) let max = size as i32 / 3; let data: Vec> = (0..size) .map(|_| { // no nulls for now - Some(rng.gen_range(0..max)) + Some(rng.random_range(0..max)) }) .collect(); @@ -119,17 +119,17 @@ impl SortedData { /// Create an f64 column of random values, with the specified number of /// rows, sorted the default fn new_f64(size: usize) -> Self { - let mut rng = thread_rng(); + let mut rng = rng(); let mut data: Vec> = (0..size / 3) .map(|_| { // no nulls for now - Some(rng.gen_range(0.0..1.0f64)) + Some(rng.random_range(0.0..1.0f64)) }) .collect(); // have some repeats (approximately 1/3 of the values are the same) while data.len() < size { - data.push(data[rng.gen_range(0..data.len())]); + data.push(data[rng.random_range(0..data.len())]); } let batches = stagger_batch(f64_batch(data.iter().cloned())); @@ -143,7 +143,7 @@ impl SortedData { /// Create an string column of random values, with the specified number of /// rows, sorted the default fn new_str(size: usize) -> Self { - let mut rng = thread_rng(); + let mut rng = rng(); let mut data: Vec> = (0..size / 3) .map(|_| { // no nulls for now @@ -153,7 +153,7 @@ impl SortedData { // have some repeats (approximately 1/3 of the values are the same) while data.len() < size { - data.push(data[rng.gen_range(0..data.len())].clone()); + data.push(data[rng.random_range(0..data.len())].clone()); } let batches = stagger_batch(string_batch(data.iter())); @@ -167,7 +167,7 @@ impl SortedData { /// Create two columns of random values (int64, string), with the specified number of /// rows, sorted the default fn new_i64str(size: usize) -> Self { - let mut rng = thread_rng(); + let mut rng = rng(); // 100 distinct values let strings: Vec> = (0..100) @@ -181,8 +181,8 @@ impl SortedData { let data = (0..size) .map(|_| { ( - Some(rng.gen_range(0..10)), - strings[rng.gen_range(0..strings.len())].clone(), + Some(rng.random_range(0..10)), + strings[rng.random_range(0..strings.len())].clone(), ) }) .collect::>(); @@ -341,8 +341,8 @@ async fn run_limit_test(fetch: usize, data: &SortedData) { /// Return random ASCII String with len fn get_random_string(len: usize) -> String { - thread_rng() - .sample_iter(rand::distributions::Alphanumeric) + rng() + .sample_iter(rand::distr::Alphanumeric) .take(len) .map(char::from) .collect() diff --git a/datafusion/core/tests/fuzz_cases/pruning.rs b/datafusion/core/tests/fuzz_cases/pruning.rs index 8ce980ee080b8..fc233fc604a1f 100644 --- a/datafusion/core/tests/fuzz_cases/pruning.rs +++ b/datafusion/core/tests/fuzz_cases/pruning.rs @@ -191,7 +191,7 @@ impl Utf8Test { /// all combinations of interesting charactes with lengths ranging from 1 to 4 fn values() -> &'static [String] { VALUES.get_or_init(|| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let characters = [ "z", @@ -254,7 +254,7 @@ impl Utf8Test { return (*files).clone(); } - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let values = Self::values(); let mut row_groups = vec![]; diff --git a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs index 19ffa69f11d36..42ff85079908d 100644 --- a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs @@ -178,20 +178,20 @@ impl SortTest { /// Return randomly sized record batches in a field named 'x' of type `Int32` /// with randomized i32 content fn make_staggered_i32_batches(len: usize) -> Vec { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let max_batch = 1024; let mut batches = vec![]; let mut remaining = len; while remaining != 0 { - let to_read = rng.gen_range(0..=remaining.min(max_batch)); + let to_read = rng.random_range(0..=remaining.min(max_batch)); remaining -= to_read; batches.push( RecordBatch::try_from_iter(vec![( "x", Arc::new(Int32Array::from_iter_values( - (0..to_read).map(|_| rng.gen()), + (0..to_read).map(|_| rng.random()), )) as ArrayRef, )]) .unwrap(), diff --git a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs index daa282c8fe4a9..c470b26fcbe78 100644 --- a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs @@ -93,7 +93,7 @@ mod sp_repartition_fuzz_tests { }; while !remaining_exprs.is_empty() { - let n_sort_expr = rng.gen_range(0..remaining_exprs.len() + 1); + let n_sort_expr = rng.random_range(0..remaining_exprs.len() + 1); remaining_exprs.shuffle(&mut rng); let ordering = remaining_exprs @@ -143,7 +143,7 @@ mod sp_repartition_fuzz_tests { // Utility closure to generate random array let mut generate_random_array = |num_elems: usize, max_val: usize| -> ArrayRef { let values: Vec = (0..num_elems) - .map(|_| rng.gen_range(0..max_val) as u64) + .map(|_| rng.random_range(0..max_val) as u64) .collect(); Arc::new(UInt64Array::from_iter_values(values)) }; @@ -445,9 +445,9 @@ mod sp_repartition_fuzz_tests { let mut input123: Vec<(i64, i64, i64)> = vec![(0, 0, 0); len]; input123.iter_mut().for_each(|v| { *v = ( - rng.gen_range(0..n_distinct) as i64, - rng.gen_range(0..n_distinct) as i64, - rng.gen_range(0..n_distinct) as i64, + rng.random_range(0..n_distinct) as i64, + rng.random_range(0..n_distinct) as i64, + rng.random_range(0..n_distinct) as i64, ) }); input123.sort(); @@ -469,7 +469,7 @@ mod sp_repartition_fuzz_tests { let mut batches = vec![]; if STREAM { while remainder.num_rows() > 0 { - let batch_size = rng.gen_range(0..50); + let batch_size = rng.random_range(0..50); if remainder.num_rows() < batch_size { break; } @@ -479,7 +479,7 @@ mod sp_repartition_fuzz_tests { } } else { while remainder.num_rows() > 0 { - let batch_size = rng.gen_range(0..remainder.num_rows() + 1); + let batch_size = rng.random_range(0..remainder.num_rows() + 1); batches.push(remainder.slice(0, batch_size)); remainder = remainder.slice(batch_size, remainder.num_rows() - batch_size); diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs index 979aa5a2da035..dd439683f9e3b 100644 --- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs @@ -51,7 +51,7 @@ use datafusion_functions_window::nth_value::{ }; use datafusion_functions_window::rank::{dense_rank_udwf, rank_udwf}; use datafusion_physical_expr_common::sort_expr::LexOrdering; -use rand::distributions::Alphanumeric; +use rand::distr::Alphanumeric; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; @@ -400,8 +400,8 @@ fn get_random_function( WindowFunctionDefinition::WindowUDF(lead_udwf()), vec![ arg.clone(), - lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))), - lit(ScalarValue::Int64(Some(rng.gen_range(1..1000)))), + lit(ScalarValue::Int64(Some(rng.random_range(1..10)))), + lit(ScalarValue::Int64(Some(rng.random_range(1..1000)))), ], ), ); @@ -411,8 +411,8 @@ fn get_random_function( WindowFunctionDefinition::WindowUDF(lag_udwf()), vec![ arg.clone(), - lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))), - lit(ScalarValue::Int64(Some(rng.gen_range(1..1000)))), + lit(ScalarValue::Int64(Some(rng.random_range(1..10)))), + lit(ScalarValue::Int64(Some(rng.random_range(1..1000)))), ], ), ); @@ -437,12 +437,12 @@ fn get_random_function( WindowFunctionDefinition::WindowUDF(nth_value_udwf()), vec![ arg.clone(), - lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))), + lit(ScalarValue::Int64(Some(rng.random_range(1..10)))), ], ), ); - let rand_fn_idx = rng.gen_range(0..window_fn_map.len()); + let rand_fn_idx = rng.random_range(0..window_fn_map.len()); let fn_name = window_fn_map.keys().collect::>()[rand_fn_idx]; let (window_fn, args) = window_fn_map.values().collect::>()[rand_fn_idx]; let mut args = args.clone(); @@ -465,12 +465,12 @@ fn get_random_window_frame(rng: &mut StdRng, is_linear: bool) -> WindowFrame { is_preceding: bool, } let first_bound = Utils { - val: rng.gen_range(0..10), - is_preceding: rng.gen_range(0..2) == 0, + val: rng.random_range(0..10), + is_preceding: rng.random_range(0..2) == 0, }; let second_bound = Utils { - val: rng.gen_range(0..10), - is_preceding: rng.gen_range(0..2) == 0, + val: rng.random_range(0..10), + is_preceding: rng.random_range(0..2) == 0, }; let (start_bound, end_bound) = if first_bound.is_preceding == second_bound.is_preceding { @@ -487,7 +487,7 @@ fn get_random_window_frame(rng: &mut StdRng, is_linear: bool) -> WindowFrame { (second_bound, first_bound) }; // 0 means Range, 1 means Rows, 2 means GROUPS - let rand_num = rng.gen_range(0..3); + let rand_num = rng.random_range(0..3); let units = if rand_num < 1 { WindowFrameUnits::Range } else if rand_num < 2 { @@ -519,7 +519,7 @@ fn get_random_window_frame(rng: &mut StdRng, is_linear: bool) -> WindowFrame { }; let mut window_frame = WindowFrame::new_bounds(units, start_bound, end_bound); // with 10% use unbounded preceding in tests - if rng.gen_range(0..10) == 0 { + if rng.random_range(0..10) == 0 { window_frame.start_bound = WindowFrameBound::Preceding(ScalarValue::Int32(None)); } @@ -547,7 +547,7 @@ fn get_random_window_frame(rng: &mut StdRng, is_linear: bool) -> WindowFrame { }; let mut window_frame = WindowFrame::new_bounds(units, start_bound, end_bound); // with 10% use unbounded preceding in tests - if rng.gen_range(0..10) == 0 { + if rng.random_range(0..10) == 0 { window_frame.start_bound = WindowFrameBound::Preceding(ScalarValue::UInt64(None)); } @@ -571,7 +571,7 @@ fn convert_bound_to_current_row_if_applicable( match bound { WindowFrameBound::Preceding(value) | WindowFrameBound::Following(value) => { if let Ok(zero) = ScalarValue::new_zero(&value.data_type()) { - if value == &zero && rng.gen_range(0..2) == 0 { + if value == &zero && rng.random_range(0..2) == 0 { *bound = WindowFrameBound::CurrentRow; } } @@ -760,9 +760,9 @@ pub(crate) fn make_staggered_batches( let mut input5: Vec = vec!["".to_string(); len]; input123.iter_mut().for_each(|v| { *v = ( - rng.gen_range(0..n_distinct) as i32, - rng.gen_range(0..n_distinct) as i32, - rng.gen_range(0..n_distinct) as i32, + rng.random_range(0..n_distinct) as i32, + rng.random_range(0..n_distinct) as i32, + rng.random_range(0..n_distinct) as i32, ) }); input123.sort(); @@ -790,7 +790,7 @@ pub(crate) fn make_staggered_batches( let mut batches = vec![]; if STREAM { while remainder.num_rows() > 0 { - let batch_size = rng.gen_range(0..50); + let batch_size = rng.random_range(0..50); if remainder.num_rows() < batch_size { batches.push(remainder); break; @@ -800,7 +800,7 @@ pub(crate) fn make_staggered_batches( } } else { while remainder.num_rows() > 0 { - let batch_size = rng.gen_range(0..remainder.num_rows() + 1); + let batch_size = rng.random_range(0..remainder.num_rows() + 1); batches.push(remainder.slice(0, batch_size)); remainder = remainder.slice(batch_size, remainder.num_rows() - batch_size); } diff --git a/datafusion/execution/Cargo.toml b/datafusion/execution/Cargo.toml index bb86868a82146..7393156aed772 100644 --- a/datafusion/execution/Cargo.toml +++ b/datafusion/execution/Cargo.toml @@ -44,7 +44,7 @@ futures = { workspace = true } log = { workspace = true } object_store = { workspace = true } parking_lot = { workspace = true } -rand = { workspace = true } +rand = { workspace = true, features = ["thread_rng"] } tempfile = { workspace = true } url = { workspace = true } diff --git a/datafusion/execution/src/disk_manager.rs b/datafusion/execution/src/disk_manager.rs index 756da7ed5b468..0b3dadc9d5d68 100644 --- a/datafusion/execution/src/disk_manager.rs +++ b/datafusion/execution/src/disk_manager.rs @@ -20,7 +20,7 @@ use datafusion_common::{resources_datafusion_err, DataFusionError, Result}; use log::debug; use parking_lot::Mutex; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; use std::path::{Path, PathBuf}; use std::sync::Arc; use tempfile::{Builder, NamedTempFile, TempDir}; @@ -136,7 +136,7 @@ impl DiskManager { local_dirs.push(Arc::new(tempdir)); } - let dir_index = thread_rng().gen_range(0..local_dirs.len()); + let dir_index = rng().random_range(0..local_dirs.len()); Ok(RefCountedTempFile { _parent_temp_dir: Arc::clone(&local_dirs[dir_index]), tempfile: Builder::new() diff --git a/datafusion/functions-aggregate-common/Cargo.toml b/datafusion/functions-aggregate-common/Cargo.toml index cf6eb99e60c62..f3f76c20e3797 100644 --- a/datafusion/functions-aggregate-common/Cargo.toml +++ b/datafusion/functions-aggregate-common/Cargo.toml @@ -45,7 +45,7 @@ datafusion-physical-expr-common = { workspace = true } [dev-dependencies] criterion = "0.5" -rand = { workspace = true } +rand = { workspace = true, features = ["thread_rng"] } [[bench]] harness = false diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs index e629e99e1657a..987ba57f7719e 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs @@ -636,7 +636,7 @@ mod test { #[test] fn accumulate_fuzz() { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); for _ in 0..100 { Fixture::new_random(&mut rng).run(); } @@ -661,23 +661,23 @@ mod test { impl Fixture { fn new_random(rng: &mut ThreadRng) -> Self { // Number of input values in a batch - let num_values: usize = rng.gen_range(1..200); + let num_values: usize = rng.random_range(1..200); // number of distinct groups - let num_groups: usize = rng.gen_range(2..1000); + let num_groups: usize = rng.random_range(2..1000); let max_group = num_groups - 1; let group_indices: Vec = (0..num_values) - .map(|_| rng.gen_range(0..max_group)) + .map(|_| rng.random_range(0..max_group)) .collect(); - let values: Vec = (0..num_values).map(|_| rng.gen()).collect(); + let values: Vec = (0..num_values).map(|_| rng.random()).collect(); // 10% chance of false // 10% change of null // 80% chance of true let filter: BooleanArray = (0..num_values) .map(|_| { - let filter_value = rng.gen_range(0.0..1.0); + let filter_value = rng.random_range(0.0..1.0); if filter_value < 0.1 { Some(false) } else if filter_value < 0.2 { @@ -690,14 +690,14 @@ mod test { // random values with random number and location of nulls // random null percentage - let null_pct: f32 = rng.gen_range(0.0..1.0); + let null_pct: f32 = rng.random_range(0.0..1.0); let values_with_nulls: Vec> = (0..num_values) .map(|_| { - let is_null = null_pct < rng.gen_range(0.0..1.0); + let is_null = null_pct < rng.random_range(0.0..1.0); if is_null { None } else { - Some(rng.gen()) + Some(rng.random()) } }) .collect(); diff --git a/datafusion/functions-aggregate/Cargo.toml b/datafusion/functions-aggregate/Cargo.toml index 333f0d9cdd79f..0b292c42c43c3 100644 --- a/datafusion/functions-aggregate/Cargo.toml +++ b/datafusion/functions-aggregate/Cargo.toml @@ -55,7 +55,6 @@ log = { workspace = true } paste = "1.0.14" [dev-dependencies] -arrow = { workspace = true, features = ["test_utils"] } criterion = "0.5" rand = { workspace = true } diff --git a/datafusion/functions-aggregate/benches/array_agg.rs b/datafusion/functions-aggregate/benches/array_agg.rs index c4599cdfc9b3d..6dc0c5ab2a39c 100644 --- a/datafusion/functions-aggregate/benches/array_agg.rs +++ b/datafusion/functions-aggregate/benches/array_agg.rs @@ -17,18 +17,17 @@ use std::sync::Arc; -use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray, ListArray}; +use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray, ListArray, PrimitiveArray}; use arrow::datatypes::Int64Type; -use arrow::util::bench_util::create_primitive_array; use arrow_schema::Field; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use datafusion_expr::Accumulator; use datafusion_functions_aggregate::array_agg::ArrayAggAccumulator; -use arrow::util::test_util::seedable_rng; use arrow_buffer::{NullBufferBuilder, OffsetBuffer}; -use rand::distributions::{Distribution, Standard}; -use rand::Rng; +use rand::distr::{Distribution, StandardUniform}; +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; fn merge_batch_bench(c: &mut Criterion, name: &str, values: ArrayRef) { let list_item_data_type = values.as_list::().values().data_type().clone(); @@ -54,20 +53,20 @@ pub fn create_list_array( ) -> ListArray where T: ArrowPrimitiveType, - Standard: Distribution, + StandardUniform: Distribution, { let mut nulls_builder = NullBufferBuilder::new(size); - let mut rng = seedable_rng(); + let mut rng = StdRng::seed_from_u64(42); let offsets = OffsetBuffer::from_lengths((0..size).map(|_| { - let is_null = rng.gen::() < null_density; + let is_null = rng.random::() < null_density; - let mut length = rng.gen_range(1..10); + let mut length = rng.random_range(1..10); if is_null { nulls_builder.append_null(); - if rng.gen::() <= zero_length_lists_probability { + if rng.random::() <= zero_length_lists_probability { length = 0; } } else { @@ -79,7 +78,15 @@ where let length = *offsets.last().unwrap() as usize; - let values = create_primitive_array::(length, 0.0); + let values = (0..length) + .map(|_| { + if rng.random::() < 0.0 { + None + } else { + Some(rng.random()) + } + }) + .collect::>(); let field = Field::new_list_field(T::DATA_TYPE, true); diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs index da5ec739ad8d1..6d0dbd58fa1d2 100644 --- a/datafusion/functions-aggregate/src/min_max.rs +++ b/datafusion/functions-aggregate/src/min_max.rs @@ -1714,10 +1714,10 @@ mod tests { use rand::Rng; fn get_random_vec_i32(len: usize) -> Vec { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let mut input = Vec::with_capacity(len); for _i in 0..len { - input.push(rng.gen_range(0..100)); + input.push(rng.random_range(0..100)); } input } diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml index 7ec8f5267b185..545226be2c2f0 100644 --- a/datafusion/functions-nested/Cargo.toml +++ b/datafusion/functions-nested/Cargo.toml @@ -59,7 +59,7 @@ paste = "1.0.14" [dev-dependencies] criterion = { version = "0.5", features = ["async_tokio"] } -rand = "0.9.0" +rand = { workspace = true, features = ["thread_rng"] } [[bench]] harness = false diff --git a/datafusion/functions-nested/benches/map.rs b/datafusion/functions-nested/benches/map.rs index f92bb6cecf9c5..c90795f151436 100644 --- a/datafusion/functions-nested/benches/map.rs +++ b/datafusion/functions-nested/benches/map.rs @@ -36,7 +36,7 @@ fn keys(rng: &mut ThreadRng) -> Vec { let mut keys = HashSet::with_capacity(1000); while keys.len() < 1000 { - keys.insert(rng.gen_range(0..10000).to_string()); + keys.insert(rng.random_range(0..10000).to_string()); } keys.into_iter().collect() @@ -46,14 +46,14 @@ fn values(rng: &mut ThreadRng) -> Vec { let mut values = HashSet::with_capacity(1000); while values.len() < 1000 { - values.insert(rng.gen_range(0..10000)); + values.insert(rng.random_range(0..10000)); } values.into_iter().collect() } fn criterion_benchmark(c: &mut Criterion) { c.bench_function("make_map_1000", |b| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let keys = keys(&mut rng); let values = values(&mut rng); let mut buffer = Vec::new(); @@ -74,7 +74,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("map_1000", |b| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let field = Arc::new(Field::new_list_field(DataType::Utf8, true)); let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 1000])); let key_list = ListArray::new( diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index db3e6838f6a53..be76de513a77e 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -91,7 +91,7 @@ uuid = { version = "1.7", features = ["v4"], optional = true } [dev-dependencies] arrow = { workspace = true, features = ["test_utils"] } criterion = "0.5" -rand = { workspace = true } +rand = { workspace = true, features = ["thread_rng"] } tokio = { workspace = true, features = ["macros", "rt", "sync"] } [[bench]] diff --git a/datafusion/functions/benches/date_bin.rs b/datafusion/functions/benches/date_bin.rs index aa7c7710617d1..542ee7edd8578 100644 --- a/datafusion/functions/benches/date_bin.rs +++ b/datafusion/functions/benches/date_bin.rs @@ -31,7 +31,7 @@ use datafusion_functions::datetime::date_bin; fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray { let mut seconds = vec![]; for _ in 0..1000 { - seconds.push(rng.gen_range(0..1_000_000)); + seconds.push(rng.random_range(0..1_000_000)); } TimestampSecondArray::from(seconds) @@ -39,7 +39,7 @@ fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray { fn criterion_benchmark(c: &mut Criterion) { c.bench_function("date_bin_1000", |b| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let timestamps_array = Arc::new(timestamps(&mut rng)) as ArrayRef; let batch_len = timestamps_array.len(); let interval = ColumnarValue::Scalar(ScalarValue::new_interval_dt(0, 1_000_000)); diff --git a/datafusion/functions/benches/find_in_set.rs b/datafusion/functions/benches/find_in_set.rs index 9307525482c2b..6459102a9b4b5 100644 --- a/datafusion/functions/benches/find_in_set.rs +++ b/datafusion/functions/benches/find_in_set.rs @@ -25,7 +25,7 @@ use arrow::util::bench_util::{ use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode}; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; -use rand::distributions::Alphanumeric; +use rand::distr::Alphanumeric; use rand::prelude::StdRng; use rand::{Rng, SeedableRng}; use std::sync::Arc; @@ -51,7 +51,7 @@ fn gen_args_array( let mut output_set_vec: Vec> = Vec::with_capacity(n_rows); let mut output_element_vec: Vec> = Vec::with_capacity(n_rows); for _ in 0..n_rows { - let rand_num = rng_ref.gen::(); // [0.0, 1.0) + let rand_num = rng_ref.random::(); // [0.0, 1.0) if rand_num < null_density { output_element_vec.push(None); output_set_vec.push(None); @@ -60,7 +60,7 @@ fn gen_args_array( let mut generated_string = String::with_capacity(str_len_chars); for i in 0..num_elements { for _ in 0..str_len_chars { - let idx = rng_ref.gen_range(0..corpus_char_count); + let idx = rng_ref.random_range(0..corpus_char_count); let char = utf8.chars().nth(idx).unwrap(); generated_string.push(char); } @@ -112,7 +112,7 @@ fn random_element_in_set(string: &str) -> String { } let mut rng = StdRng::seed_from_u64(44); - let random_index = rng.gen_range(0..elements.len()); + let random_index = rng.random_range(0..elements.len()); elements[random_index].to_string() } diff --git a/datafusion/functions/benches/helper.rs b/datafusion/functions/benches/helper.rs index c7c405bc46969..6bfba55346b8e 100644 --- a/datafusion/functions/benches/helper.rs +++ b/datafusion/functions/benches/helper.rs @@ -17,7 +17,7 @@ use arrow::array::{StringArray, StringViewArray}; use datafusion_expr::ColumnarValue; -use rand::distributions::Alphanumeric; +use rand::distr::Alphanumeric; use rand::{rngs::StdRng, Rng, SeedableRng}; use std::sync::Arc; @@ -39,14 +39,14 @@ pub fn gen_string_array( let mut output_string_vec: Vec> = Vec::with_capacity(n_rows); for _ in 0..n_rows { - let rand_num = rng_ref.gen::(); // [0.0, 1.0) + let rand_num = rng_ref.random::(); // [0.0, 1.0) if rand_num < null_density { output_string_vec.push(None); } else if rand_num < null_density + utf8_density { // Generate random UTF8 string let mut generated_string = String::with_capacity(str_len_chars); for _ in 0..str_len_chars { - let idx = rng_ref.gen_range(0..corpus_char_count); + let idx = rng_ref.random_range(0..corpus_char_count); let char = corpus.chars().nth(idx).unwrap(); generated_string.push(char); } diff --git a/datafusion/functions/benches/ltrim.rs b/datafusion/functions/benches/ltrim.rs index fed455eeac91e..849ff2cedafe6 100644 --- a/datafusion/functions/benches/ltrim.rs +++ b/datafusion/functions/benches/ltrim.rs @@ -25,7 +25,7 @@ use criterion::{ use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarUDF}; use datafusion_functions::string; -use rand::{distributions::Alphanumeric, rngs::StdRng, Rng, SeedableRng}; +use rand::{distr::Alphanumeric, rngs::StdRng, Rng, SeedableRng}; use std::{fmt, sync::Arc}; pub fn seedable_rng() -> StdRng { @@ -64,7 +64,7 @@ pub fn create_string_array_and_characters( // - Other 90% will be strings with same `remaining_len` lengths // We will build the string array on it later. let string_iter = (0..size).map(|_| { - if rng.gen::() < 0.1 { + if rng.random::() < 0.1 { None } else { let mut value = trimmed.as_bytes().to_vec(); diff --git a/datafusion/functions/benches/make_date.rs b/datafusion/functions/benches/make_date.rs index d9309bcd3db23..7b5f33a8e9408 100644 --- a/datafusion/functions/benches/make_date.rs +++ b/datafusion/functions/benches/make_date.rs @@ -31,7 +31,7 @@ use datafusion_functions::datetime::make_date; fn years(rng: &mut ThreadRng) -> Int32Array { let mut years = vec![]; for _ in 0..1000 { - years.push(rng.gen_range(1900..2050)); + years.push(rng.random_range(1900..2050)); } Int32Array::from(years) @@ -40,7 +40,7 @@ fn years(rng: &mut ThreadRng) -> Int32Array { fn months(rng: &mut ThreadRng) -> Int32Array { let mut months = vec![]; for _ in 0..1000 { - months.push(rng.gen_range(1..13)); + months.push(rng.random_range(1..13)); } Int32Array::from(months) @@ -49,14 +49,14 @@ fn months(rng: &mut ThreadRng) -> Int32Array { fn days(rng: &mut ThreadRng) -> Int32Array { let mut days = vec![]; for _ in 0..1000 { - days.push(rng.gen_range(1..29)); + days.push(rng.random_range(1..29)); } Int32Array::from(days) } fn criterion_benchmark(c: &mut Criterion) { c.bench_function("make_date_col_col_col_1000", |b| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let years_array = Arc::new(years(&mut rng)) as ArrayRef; let batch_len = years_array.len(); let years = ColumnarValue::Array(years_array); @@ -77,7 +77,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("make_date_scalar_col_col_1000", |b| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let year = ColumnarValue::Scalar(ScalarValue::Int32(Some(2025))); let months_arr = Arc::new(months(&mut rng)) as ArrayRef; let batch_len = months_arr.len(); @@ -98,7 +98,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("make_date_scalar_scalar_col_1000", |b| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let year = ColumnarValue::Scalar(ScalarValue::Int32(Some(2025))); let month = ColumnarValue::Scalar(ScalarValue::Int32(Some(11))); let day_arr = Arc::new(days(&mut rng)); diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs index 6f267b350a35f..109baedd8a663 100644 --- a/datafusion/functions/benches/pad.rs +++ b/datafusion/functions/benches/pad.rs @@ -23,7 +23,7 @@ use arrow::util::bench_util::{ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use datafusion_expr::ColumnarValue; use datafusion_functions::unicode::{lpad, rpad}; -use rand::distributions::{Distribution, Uniform}; +use rand::distr::{Distribution, Uniform}; use rand::Rng; use std::sync::Arc; @@ -49,13 +49,13 @@ where T: ArrowPrimitiveType, { let dist = Filter { - dist: Uniform::new_inclusive::(0, len as i64), + dist: Uniform::new_inclusive::(0, len as i64).unwrap(), }; - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { Some(rng.sample(&dist)) diff --git a/datafusion/functions/benches/regx.rs b/datafusion/functions/benches/regx.rs index 468d3d548bcf0..0827ea85b5110 100644 --- a/datafusion/functions/benches/regx.rs +++ b/datafusion/functions/benches/regx.rs @@ -26,9 +26,9 @@ use datafusion_functions::regex::regexpcount::regexp_count_func; use datafusion_functions::regex::regexplike::regexp_like; use datafusion_functions::regex::regexpmatch::regexp_match; use datafusion_functions::regex::regexpreplace::regexp_replace; -use rand::distributions::Alphanumeric; +use rand::distr::Alphanumeric; use rand::rngs::ThreadRng; -use rand::seq::SliceRandom; +use rand::seq::IndexedRandom; use rand::Rng; use std::iter; use std::sync::Arc; @@ -65,7 +65,7 @@ fn regex(rng: &mut ThreadRng) -> StringArray { fn start(rng: &mut ThreadRng) -> Int64Array { let mut data: Vec = vec![]; for _ in 0..1000 { - data.push(rng.gen_range(1..5)); + data.push(rng.random_range(1..5)); } Int64Array::from(data) @@ -88,7 +88,7 @@ fn flags(rng: &mut ThreadRng) -> StringArray { fn criterion_benchmark(c: &mut Criterion) { c.bench_function("regexp_count_1000 string", |b| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let data = Arc::new(data(&mut rng)) as ArrayRef; let regex = Arc::new(regex(&mut rng)) as ArrayRef; let start = Arc::new(start(&mut rng)) as ArrayRef; @@ -108,7 +108,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("regexp_count_1000 utf8view", |b| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let data = cast(&data(&mut rng), &DataType::Utf8View).unwrap(); let regex = cast(®ex(&mut rng), &DataType::Utf8View).unwrap(); let start = Arc::new(start(&mut rng)) as ArrayRef; @@ -128,7 +128,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("regexp_like_1000", |b| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let data = Arc::new(data(&mut rng)) as ArrayRef; let regex = Arc::new(regex(&mut rng)) as ArrayRef; let flags = Arc::new(flags(&mut rng)) as ArrayRef; @@ -142,7 +142,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("regexp_match_1000", |b| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let data = Arc::new(data(&mut rng)) as ArrayRef; let regex = Arc::new(regex(&mut rng)) as ArrayRef; let flags = Arc::new(flags(&mut rng)) as ArrayRef; @@ -160,7 +160,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("regexp_replace_1000", |b| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let data = Arc::new(data(&mut rng)) as ArrayRef; let regex = Arc::new(regex(&mut rng)) as ArrayRef; let flags = Arc::new(flags(&mut rng)) as ArrayRef; diff --git a/datafusion/functions/benches/strpos.rs b/datafusion/functions/benches/strpos.rs index f4962380dfbf1..0f6406061eade 100644 --- a/datafusion/functions/benches/strpos.rs +++ b/datafusion/functions/benches/strpos.rs @@ -20,7 +20,7 @@ extern crate criterion; use arrow::array::{StringArray, StringViewArray}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use datafusion_expr::ColumnarValue; -use rand::distributions::Alphanumeric; +use rand::distr::Alphanumeric; use rand::prelude::StdRng; use rand::{Rng, SeedableRng}; use std::str::Chars; @@ -45,7 +45,7 @@ fn gen_string_array( let mut output_string_vec: Vec> = Vec::with_capacity(n_rows); let mut output_sub_string_vec: Vec> = Vec::with_capacity(n_rows); for _ in 0..n_rows { - let rand_num = rng_ref.gen::(); // [0.0, 1.0) + let rand_num = rng_ref.random::(); // [0.0, 1.0) if rand_num < null_density { output_sub_string_vec.push(None); output_string_vec.push(None); @@ -53,7 +53,7 @@ fn gen_string_array( // Generate random UTF8 string let mut generated_string = String::with_capacity(str_len_chars); for _ in 0..str_len_chars { - let idx = rng_ref.gen_range(0..corpus_char_count); + let idx = rng_ref.random_range(0..corpus_char_count); let char = utf8.chars().nth(idx).unwrap(); generated_string.push(char); } @@ -93,8 +93,8 @@ fn random_substring(chars: Chars) -> String { // get the substring of a random length from the input string by byte unit let mut rng = StdRng::seed_from_u64(44); let count = chars.clone().count(); - let start = rng.gen_range(0..count - 1); - let end = rng.gen_range(start + 1..count); + let start = rng.random_range(0..count - 1); + let end = rng.random_range(start + 1..count); chars .enumerate() .filter(|(i, _)| *i >= start && *i < end) diff --git a/datafusion/functions/benches/substr_index.rs b/datafusion/functions/benches/substr_index.rs index 1ea8e2606f0d7..021436db6b83c 100644 --- a/datafusion/functions/benches/substr_index.rs +++ b/datafusion/functions/benches/substr_index.rs @@ -21,7 +21,7 @@ use std::sync::Arc; use arrow::array::{ArrayRef, Int64Array, StringArray}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use rand::distributions::{Alphanumeric, Uniform}; +use rand::distr::{Alphanumeric, Uniform}; use rand::prelude::Distribution; use rand::Rng; @@ -50,22 +50,22 @@ where fn data() -> (StringArray, StringArray, Int64Array) { let dist = Filter { - dist: Uniform::new(-4, 5), + dist: Uniform::new(-4, 5).unwrap(), test: |x: &i64| x != &0, }; - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let mut strings: Vec = vec![]; let mut delimiters: Vec = vec![]; let mut counts: Vec = vec![]; for _ in 0..1000 { - let length = rng.gen_range(20..50); + let length = rng.random_range(20..50); let text: String = (&mut rng) .sample_iter(&Alphanumeric) .take(length) .map(char::from) .collect(); - let char = rng.gen_range(0..text.len()); + let char = rng.random_range(0..text.len()); let delimiter = &text.chars().nth(char).unwrap(); let count = rng.sample(&dist); diff --git a/datafusion/functions/benches/to_char.rs b/datafusion/functions/benches/to_char.rs index 72eae45b1e1b9..0039c5b7dd68a 100644 --- a/datafusion/functions/benches/to_char.rs +++ b/datafusion/functions/benches/to_char.rs @@ -24,7 +24,7 @@ use chrono::prelude::*; use chrono::TimeDelta; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::rngs::ThreadRng; -use rand::seq::SliceRandom; +use rand::seq::IndexedRandom; use rand::Rng; use datafusion_common::ScalarValue; @@ -38,7 +38,7 @@ fn random_date_in_range( end_date: NaiveDate, ) -> NaiveDate { let days_in_range = (end_date - start_date).num_days(); - let random_days: i64 = rng.gen_range(0..days_in_range); + let random_days: i64 = rng.random_range(0..days_in_range); start_date + TimeDelta::try_days(random_days).unwrap() } @@ -81,7 +81,7 @@ fn patterns(rng: &mut ThreadRng) -> StringArray { fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_char_array_array_1000", |b| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let data_arr = data(&mut rng); let batch_len = data_arr.len(); let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef); @@ -98,7 +98,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); c.bench_function("to_char_array_scalar_1000", |b| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let data_arr = data(&mut rng); let batch_len = data_arr.len(); let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef); diff --git a/datafusion/functions/src/math/random.rs b/datafusion/functions/src/math/random.rs index 197d065ea408f..542554f765f9b 100644 --- a/datafusion/functions/src/math/random.rs +++ b/datafusion/functions/src/math/random.rs @@ -21,7 +21,7 @@ use std::sync::Arc; use arrow::array::Float64Array; use arrow::datatypes::DataType; use arrow::datatypes::DataType::Float64; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; use datafusion_common::{internal_err, Result}; use datafusion_expr::ColumnarValue; @@ -78,7 +78,7 @@ impl ScalarUDFImpl for RandomFunc { if !args.is_empty() { return internal_err!("{} function does not accept arguments", self.name()); } - let mut rng = thread_rng(); + let mut rng = rng(); let mut values = vec![0.0; num_rows]; // Equivalent to set each element with rng.gen_range(0.0..1.0), but more efficient rng.fill(&mut values[..]); diff --git a/datafusion/physical-expr/benches/in_list.rs b/datafusion/physical-expr/benches/in_list.rs index 90bfc5efb61e8..e91e8d1f137c1 100644 --- a/datafusion/physical-expr/benches/in_list.rs +++ b/datafusion/physical-expr/benches/in_list.rs @@ -21,7 +21,7 @@ use arrow::record_batch::RecordBatch; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use datafusion_common::ScalarValue; use datafusion_physical_expr::expressions::{col, in_list, lit}; -use rand::distributions::Alphanumeric; +use rand::distr::Alphanumeric; use rand::prelude::*; use std::sync::Arc; @@ -51,7 +51,7 @@ fn do_benches( for string_length in [5, 10, 20] { let values: StringArray = (0..array_length) .map(|_| { - rng.gen_bool(null_percent) + rng.random_bool(null_percent) .then(|| random_string(&mut rng, string_length)) }) .collect(); @@ -71,11 +71,11 @@ fn do_benches( } let values: Float32Array = (0..array_length) - .map(|_| rng.gen_bool(null_percent).then(|| rng.gen())) + .map(|_| rng.random_bool(null_percent).then(|| rng.random())) .collect(); let in_list: Vec<_> = (0..in_list_length) - .map(|_| ScalarValue::Float32(Some(rng.gen()))) + .map(|_| ScalarValue::Float32(Some(rng.random()))) .collect(); do_bench( @@ -86,11 +86,11 @@ fn do_benches( ); let values: Int32Array = (0..array_length) - .map(|_| rng.gen_bool(null_percent).then(|| rng.gen())) + .map(|_| rng.random_bool(null_percent).then(|| rng.random())) .collect(); let in_list: Vec<_> = (0..in_list_length) - .map(|_| ScalarValue::Int32(Some(rng.gen()))) + .map(|_| ScalarValue::Int32(Some(rng.random()))) .collect(); do_bench( diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs index f5a83c58deecb..331c8a3dac41c 100644 --- a/datafusion/physical-expr/src/intervals/cp_solver.rs +++ b/datafusion/physical-expr/src/intervals/cp_solver.rs @@ -816,8 +816,8 @@ mod tests { let mut r = StdRng::seed_from_u64(seed); let (left_given, right_given, left_expected, right_expected) = if ASC { - let left = r.gen_range((0 as $TYPE)..(1000 as $TYPE)); - let right = r.gen_range((0 as $TYPE)..(1000 as $TYPE)); + let left = r.random_range((0 as $TYPE)..(1000 as $TYPE)); + let right = r.random_range((0 as $TYPE)..(1000 as $TYPE)); ( (Some(left), None), (Some(right), None), @@ -825,8 +825,8 @@ mod tests { (Some(<$TYPE>::max(right, left + expr_right)), None), ) } else { - let left = r.gen_range((0 as $TYPE)..(1000 as $TYPE)); - let right = r.gen_range((0 as $TYPE)..(1000 as $TYPE)); + let left = r.random_range((0 as $TYPE)..(1000 as $TYPE)); + let right = r.random_range((0 as $TYPE)..(1000 as $TYPE)); ( (None, Some(left)), (None, Some(right)), diff --git a/datafusion/physical-plan/src/joins/test_utils.rs b/datafusion/physical-plan/src/joins/test_utils.rs index 37d6c0aff8503..6c1fd2dea0e1b 100644 --- a/datafusion/physical-plan/src/joins/test_utils.rs +++ b/datafusion/physical-plan/src/joins/test_utils.rs @@ -196,7 +196,7 @@ struct AscendingRandomFloatIterator { impl AscendingRandomFloatIterator { fn new(min: f64, max: f64) -> Self { let mut rng = StdRng::seed_from_u64(42); - let initial = rng.gen_range(min..max); + let initial = rng.random_range(min..max); AscendingRandomFloatIterator { prev: initial, max, @@ -209,7 +209,7 @@ impl Iterator for AscendingRandomFloatIterator { type Item = f64; fn next(&mut self) -> Option { - let value = self.rng.gen_range(self.prev..self.max); + let value = self.rng.random_range(self.prev..self.max); self.prev = value; Some(value) } diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 4ad6e213cda3d..107048d31b701 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -30,4 +30,4 @@ arrow = { workspace = true } chrono-tz = { version = "0.10.0", default-features = false } datafusion-common = { workspace = true, default-features = true } env_logger = { workspace = true } -rand = { workspace = true } +rand = { workspace = true, features = ["thread_rng"] } diff --git a/test-utils/src/array_gen/binary.rs b/test-utils/src/array_gen/binary.rs index d342118fa85d3..9740eeae5e7fe 100644 --- a/test-utils/src/array_gen/binary.rs +++ b/test-utils/src/array_gen/binary.rs @@ -46,11 +46,11 @@ impl BinaryArrayGenerator { // Pick num_binaries randomly from the distinct binary table let indices: UInt32Array = (0..self.num_binaries) .map(|_| { - if self.rng.gen::() < self.null_pct { + if self.rng.random::() < self.null_pct { None } else if self.num_distinct_binaries > 1 { let range = 0..(self.num_distinct_binaries as u32); - Some(self.rng.gen_range(range)) + Some(self.rng.random_range(range)) } else { Some(0) } @@ -68,11 +68,11 @@ impl BinaryArrayGenerator { let indices: UInt32Array = (0..self.num_binaries) .map(|_| { - if self.rng.gen::() < self.null_pct { + if self.rng.random::() < self.null_pct { None } else if self.num_distinct_binaries > 1 { let range = 0..(self.num_distinct_binaries as u32); - Some(self.rng.gen_range(range)) + Some(self.rng.random_range(range)) } else { Some(0) } @@ -88,7 +88,7 @@ fn random_binary(rng: &mut StdRng, max_len: usize) -> Vec { if max_len == 0 { Vec::new() } else { - let len = rng.gen_range(1..=max_len); - (0..len).map(|_| rng.gen()).collect() + let len = rng.random_range(1..=max_len); + (0..len).map(|_| rng.random()).collect() } } diff --git a/test-utils/src/array_gen/boolean.rs b/test-utils/src/array_gen/boolean.rs index f3b83dd245f72..004d615b4caa4 100644 --- a/test-utils/src/array_gen/boolean.rs +++ b/test-utils/src/array_gen/boolean.rs @@ -34,7 +34,7 @@ impl BooleanArrayGenerator { // Table of booleans from which to draw (distinct means 1 or 2) let distinct_booleans: BooleanArray = match self.num_distinct_booleans { 1 => { - let value = self.rng.gen::(); + let value = self.rng.random::(); let mut builder = BooleanBuilder::with_capacity(1); builder.append_value(value); builder.finish() @@ -51,10 +51,10 @@ impl BooleanArrayGenerator { // Generate indices to select from the distinct booleans let indices: UInt32Array = (0..self.num_booleans) .map(|_| { - if self.rng.gen::() < self.null_pct { + if self.rng.random::() < self.null_pct { None } else if self.num_distinct_booleans > 1 { - Some(self.rng.gen_range(0..self.num_distinct_booleans as u32)) + Some(self.rng.random_range(0..self.num_distinct_booleans as u32)) } else { Some(0) } diff --git a/test-utils/src/array_gen/decimal.rs b/test-utils/src/array_gen/decimal.rs index d46ea9fe54575..c5ec8ac5e8938 100644 --- a/test-utils/src/array_gen/decimal.rs +++ b/test-utils/src/array_gen/decimal.rs @@ -62,11 +62,11 @@ impl DecimalArrayGenerator { // pick num_decimals randomly from the distinct decimal table let indices: UInt32Array = (0..self.num_decimals) .map(|_| { - if self.rng.gen::() < self.null_pct { + if self.rng.random::() < self.null_pct { None } else if self.num_distinct_decimals > 1 { let range = 1..(self.num_distinct_decimals as u32); - Some(self.rng.gen_range(range)) + Some(self.rng.random_range(range)) } else { Some(0) } diff --git a/test-utils/src/array_gen/primitive.rs b/test-utils/src/array_gen/primitive.rs index 58d39c14e65d6..d6a08f6263b53 100644 --- a/test-utils/src/array_gen/primitive.rs +++ b/test-utils/src/array_gen/primitive.rs @@ -18,7 +18,8 @@ use arrow::array::{ArrayRef, ArrowPrimitiveType, PrimitiveArray, UInt32Array}; use arrow::datatypes::DataType; use chrono_tz::{Tz, TZ_VARIANTS}; -use rand::{rngs::StdRng, seq::SliceRandom, thread_rng, Rng}; +use rand::seq::IndexedRandom; +use rand::{rng, rngs::StdRng, Rng}; use std::sync::Arc; use super::random_data::RandomNativeData; @@ -81,11 +82,11 @@ impl PrimitiveArrayGenerator { // pick num_primitives randomly from the distinct string table let indices: UInt32Array = (0..self.num_primitives) .map(|_| { - if self.rng.gen::() < self.null_pct { + if self.rng.random::() < self.null_pct { None } else if self.num_distinct_primitives > 1 { let range = 1..(self.num_distinct_primitives as u32); - Some(self.rng.gen_range(range)) + Some(self.rng.random_range(range)) } else { Some(0) } @@ -102,7 +103,7 @@ impl PrimitiveArrayGenerator { /// - `Some(Arc)` containing the timezone name. /// - `None` if no timezone is selected. fn generate_timezone() -> Option> { - let mut rng = thread_rng(); + let mut rng = rng(); // Allows for timezones + None let mut timezone_options: Vec> = vec![None]; diff --git a/test-utils/src/array_gen/random_data.rs b/test-utils/src/array_gen/random_data.rs index a7297d45fdf07..1c9a9bd7e95aa 100644 --- a/test-utils/src/array_gen/random_data.rs +++ b/test-utils/src/array_gen/random_data.rs @@ -25,7 +25,7 @@ use arrow::datatypes::{ TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, }; -use rand::distributions::Standard; +use rand::distr::StandardUniform; use rand::prelude::Distribution; use rand::rngs::StdRng; use rand::Rng; @@ -40,11 +40,11 @@ macro_rules! basic_random_data { ($ARROW_TYPE: ty) => { impl RandomNativeData for $ARROW_TYPE where - Standard: Distribution, + StandardUniform: Distribution, { #[inline] fn generate_random_native_data(rng: &mut StdRng) -> Self::Native { - rng.gen::() + rng.random::() } } }; @@ -75,7 +75,7 @@ basic_random_data!(TimestampNanosecondType); impl RandomNativeData for Date64Type { fn generate_random_native_data(rng: &mut StdRng) -> Self::Native { // TODO: constrain this range to valid dates if necessary - let date_value = rng.gen_range(i64::MIN..=i64::MAX); + let date_value = rng.random_range(i64::MIN..=i64::MAX); let millis_per_day = 86_400_000; date_value - (date_value % millis_per_day) } @@ -84,8 +84,8 @@ impl RandomNativeData for Date64Type { impl RandomNativeData for IntervalDayTimeType { fn generate_random_native_data(rng: &mut StdRng) -> Self::Native { IntervalDayTime { - days: rng.gen::(), - milliseconds: rng.gen::(), + days: rng.random::(), + milliseconds: rng.random::(), } } } @@ -93,15 +93,15 @@ impl RandomNativeData for IntervalDayTimeType { impl RandomNativeData for IntervalMonthDayNanoType { fn generate_random_native_data(rng: &mut StdRng) -> Self::Native { IntervalMonthDayNano { - months: rng.gen::(), - days: rng.gen::(), - nanoseconds: rng.gen::(), + months: rng.random::(), + days: rng.random::(), + nanoseconds: rng.random::(), } } } impl RandomNativeData for Decimal256Type { fn generate_random_native_data(rng: &mut StdRng) -> Self::Native { - i256::from_parts(rng.gen::(), rng.gen::()) + i256::from_parts(rng.random::(), rng.random::()) } } diff --git a/test-utils/src/array_gen/string.rs b/test-utils/src/array_gen/string.rs index a405cb76b1bd2..9e066fc972612 100644 --- a/test-utils/src/array_gen/string.rs +++ b/test-utils/src/array_gen/string.rs @@ -47,11 +47,11 @@ impl StringArrayGenerator { // pick num_strings randomly from the distinct string table let indices: UInt32Array = (0..self.num_strings) .map(|_| { - if self.rng.gen::() < self.null_pct { + if self.rng.random::() < self.null_pct { None } else if self.num_distinct_strings > 1 { let range = 1..(self.num_distinct_strings as u32); - Some(self.rng.gen_range(range)) + Some(self.rng.random_range(range)) } else { Some(0) } @@ -71,11 +71,11 @@ impl StringArrayGenerator { // pick num_strings randomly from the distinct string table let indices: UInt32Array = (0..self.num_strings) .map(|_| { - if self.rng.gen::() < self.null_pct { + if self.rng.random::() < self.null_pct { None } else if self.num_distinct_strings > 1 { let range = 1..(self.num_distinct_strings as u32); - Some(self.rng.gen_range(range)) + Some(self.rng.random_range(range)) } else { Some(0) } @@ -92,10 +92,10 @@ fn random_string(rng: &mut StdRng, max_len: usize) -> String { // pick characters at random (not just ascii) match max_len { 0 => "".to_string(), - 1 => String::from(rng.gen::()), + 1 => String::from(rng.random::()), _ => { - let len = rng.gen_range(1..=max_len); - rng.sample_iter::(rand::distributions::Standard) + let len = rng.random_range(1..=max_len); + rng.sample_iter::(rand::distr::StandardUniform) .take(len) .map(char::from) .collect::() diff --git a/test-utils/src/data_gen.rs b/test-utils/src/data_gen.rs index 7ac6f3d3e255a..2228010b28dd1 100644 --- a/test-utils/src/data_gen.rs +++ b/test-utils/src/data_gen.rs @@ -104,10 +104,11 @@ impl BatchBuilder { } fn append(&mut self, rng: &mut StdRng, host: &str, service: &str) { - let num_pods = rng.gen_range(self.options.pods_per_host.clone()); + let num_pods = rng.random_range(self.options.pods_per_host.clone()); let pods = generate_sorted_strings(rng, num_pods, 30..40); for pod in pods { - let num_containers = rng.gen_range(self.options.containers_per_pod.clone()); + let num_containers = + rng.random_range(self.options.containers_per_pod.clone()); for container_idx in 0..num_containers { let container = format!("{service}_container_{container_idx}"); let image = format!( @@ -115,7 +116,7 @@ impl BatchBuilder { ); let num_entries = - rng.gen_range(self.options.entries_per_container.clone()); + rng.random_range(self.options.entries_per_container.clone()); for i in 0..num_entries { if self.is_finished() { return; @@ -154,7 +155,7 @@ impl BatchBuilder { if self.options.include_nulls { // Append a null value if the option is set // Use both "NULL" as a string and a null value - if rng.gen_bool(0.5) { + if rng.random_bool(0.5) { self.client_addr.append_null(); } else { self.client_addr.append_value("NULL"); @@ -162,26 +163,26 @@ impl BatchBuilder { } else { self.client_addr.append_value(format!( "{}.{}.{}.{}", - rng.gen::(), - rng.gen::(), - rng.gen::(), - rng.gen::() + rng.random::(), + rng.random::(), + rng.random::(), + rng.random::() )); } - self.request_duration.append_value(rng.gen()); + self.request_duration.append_value(rng.random()); self.request_user_agent .append_value(random_string(rng, 20..100)); self.request_method - .append_value(methods[rng.gen_range(0..methods.len())]); + .append_value(methods[rng.random_range(0..methods.len())]); self.request_host .append_value(format!("https://{service}.mydomain.com")); self.request_bytes - .append_option(rng.gen_bool(0.9).then(|| rng.gen())); + .append_option(rng.random_bool(0.9).then(|| rng.random())); self.response_bytes - .append_option(rng.gen_bool(0.9).then(|| rng.gen())); + .append_option(rng.random_bool(0.9).then(|| rng.random())); self.response_status - .append_value(status[rng.gen_range(0..status.len())]); + .append_value(status[rng.random_range(0..status.len())]); self.prices_status.append_value(self.row_count as i128); } @@ -216,9 +217,9 @@ impl BatchBuilder { } fn random_string(rng: &mut StdRng, len_range: Range) -> String { - let len = rng.gen_range(len_range); + let len = rng.random_range(len_range); (0..len) - .map(|_| rng.gen_range(b'a'..=b'z') as char) + .map(|_| rng.random_range(b'a'..=b'z') as char) .collect::() } @@ -364,7 +365,7 @@ impl Iterator for AccessLogGenerator { self.host_idx += 1; for service in &["frontend", "backend", "database", "cache"] { - if self.rng.gen_bool(0.5) { + if self.rng.random_bool(0.5) { continue; } if builder.is_finished() { diff --git a/test-utils/src/lib.rs b/test-utils/src/lib.rs index 9db8920833ae5..aa88b08fdd984 100644 --- a/test-utils/src/lib.rs +++ b/test-utils/src/lib.rs @@ -68,9 +68,9 @@ pub fn add_empty_batches( // insert 0, or 1 empty batches before and after the current batch let empty_batch = RecordBatch::new_empty(schema.clone()); std::iter::repeat(empty_batch.clone()) - .take(rng.gen_range(0..2)) + .take(rng.random_range(0..2)) .chain(std::iter::once(batch)) - .chain(std::iter::repeat(empty_batch).take(rng.gen_range(0..2))) + .chain(std::iter::repeat(empty_batch).take(rng.random_range(0..2))) }) .collect() } @@ -101,7 +101,7 @@ pub fn stagger_batch_with_seed(batch: RecordBatch, seed: u64) -> Vec 0 { - let batch_size = rng.gen_range(0..remainder.num_rows() + 1); + let batch_size = rng.random_range(0..remainder.num_rows() + 1); batches.push(remainder.slice(0, batch_size)); remainder = remainder.slice(batch_size, remainder.num_rows() - batch_size); diff --git a/test-utils/src/string_gen.rs b/test-utils/src/string_gen.rs index b598241db1e92..d4a378bca739d 100644 --- a/test-utils/src/string_gen.rs +++ b/test-utils/src/string_gen.rs @@ -19,7 +19,7 @@ use crate::array_gen::StringArrayGenerator; use crate::stagger_batch; use arrow::record_batch::RecordBatch; use rand::rngs::StdRng; -use rand::{thread_rng, Rng, SeedableRng}; +use rand::{rng, Rng, SeedableRng}; /// Randomly generate strings pub struct StringBatchGenerator(StringArrayGenerator); @@ -60,14 +60,14 @@ impl StringBatchGenerator { /// cases pub fn interesting_cases() -> Vec { let mut cases = vec![]; - let mut rng = thread_rng(); + let mut rng = rng(); for null_pct in [0.0, 0.01, 0.1, 0.5] { for _ in 0..10 { // max length of generated strings - let max_len = rng.gen_range(1..50); - let num_strings = rng.gen_range(1..100); + let max_len = rng.random_range(1..50); + let num_strings = rng.random_range(1..100); let num_distinct_strings = if num_strings > 1 { - rng.gen_range(1..num_strings) + rng.random_range(1..num_strings) } else { num_strings }; @@ -76,7 +76,7 @@ impl StringBatchGenerator { num_strings, num_distinct_strings, null_pct, - rng: StdRng::from_seed(rng.gen()), + rng: StdRng::from_seed(rng.random()), })) } } From 8d6d98cdcecc6f1d26874aa50c3ca8604281b986 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 3 Feb 2025 16:40:05 +0100 Subject: [PATCH 4/7] Rustfmt --- datafusion/functions-aggregate/benches/array_agg.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion/functions-aggregate/benches/array_agg.rs b/datafusion/functions-aggregate/benches/array_agg.rs index 6dc0c5ab2a39c..b9e9f121aaf44 100644 --- a/datafusion/functions-aggregate/benches/array_agg.rs +++ b/datafusion/functions-aggregate/benches/array_agg.rs @@ -17,7 +17,9 @@ use std::sync::Arc; -use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray, ListArray, PrimitiveArray}; +use arrow::array::{ + Array, ArrayRef, ArrowPrimitiveType, AsArray, ListArray, PrimitiveArray, +}; use arrow::datatypes::Int64Type; use arrow_schema::Field; use criterion::{black_box, criterion_group, criterion_main, Criterion}; From d817b4b39610a1429573117b851fa872d983947a Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 3 Feb 2025 17:04:16 +0100 Subject: [PATCH 5/7] Update cli `Cargo.lock` --- datafusion-cli/Cargo.lock | 85 +++++++++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 17 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index e613e7eb922f1..369be49037c96 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -34,7 +34,7 @@ dependencies = [ "getrandom 0.2.15", "once_cell", "version_check", - "zerocopy", + "zerocopy 0.7.35", ] [[package]] @@ -146,7 +146,7 @@ dependencies = [ "log", "num-bigint", "quad-rand", - "rand", + "rand 0.8.5", "regex-lite", "serde", "serde_bytes", @@ -887,9 +887,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" +checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" [[package]] name = "bytes-utils" @@ -1246,7 +1246,7 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "rand", + "rand 0.9.0", "regex", "sqlparser", "tempfile", @@ -1359,7 +1359,7 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand", + "rand 0.9.0", "tempfile", "url", ] @@ -1414,7 +1414,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", - "rand", + "rand 0.9.0", "regex", "sha2", "unicode-segmentation", @@ -2830,7 +2830,7 @@ dependencies = [ "parking_lot", "percent-encoding", "quick-xml", - "rand", + "rand 0.8.5", "reqwest", "ring", "rustls-pemfile 2.2.0", @@ -2993,7 +2993,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ "phf_shared", - "rand", + "rand 0.8.5", ] [[package]] @@ -3035,7 +3035,7 @@ version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" dependencies = [ - "zerocopy", + "zerocopy 0.7.35", ] [[package]] @@ -3137,7 +3137,7 @@ checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" dependencies = [ "bytes", "getrandom 0.2.15", - "rand", + "rand 0.8.5", "ring", "rustc-hash", "rustls 0.23.22", @@ -3189,8 +3189,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.0", + "zerocopy 0.8.14", ] [[package]] @@ -3200,7 +3211,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.0", ] [[package]] @@ -3212,6 +3233,16 @@ dependencies = [ "getrandom 0.2.15", ] +[[package]] +name = "rand_core" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b08f3c9802962f7e1b25113931d94f43ed9725bebc59db9d0c3e9a23b67e15ff" +dependencies = [ + "getrandom 0.3.1", + "zerocopy 0.8.14", +] + [[package]] name = "recursive" version = "0.1.1" @@ -4292,9 +4323,9 @@ checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" [[package]] name = "wait-timeout" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" dependencies = [ "libc", ] @@ -4643,7 +4674,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ "byteorder", - "zerocopy-derive", + "zerocopy-derive 0.7.35", +] + +[[package]] +name = "zerocopy" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a367f292d93d4eab890745e75a778da40909cab4d6ff8173693812f79c4a2468" +dependencies = [ + "zerocopy-derive 0.8.14", ] [[package]] @@ -4657,6 +4697,17 @@ dependencies = [ "syn", ] +[[package]] +name = "zerocopy-derive" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3931cb58c62c13adec22e38686b559c86a30565e16ad6e8510a337cedc611e1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zerofrom" version = "0.1.5" From 2a241258c8b33850d3cafa08fd9ecefc5d451ccf Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 3 Feb 2025 17:24:06 +0100 Subject: [PATCH 6/7] Bump `getrandom` to `0.3.1` --- datafusion/wasmtest/.cargo/config.toml | 2 ++ datafusion/wasmtest/Cargo.toml | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 datafusion/wasmtest/.cargo/config.toml diff --git a/datafusion/wasmtest/.cargo/config.toml b/datafusion/wasmtest/.cargo/config.toml new file mode 100644 index 0000000000000..2e07606d52fb3 --- /dev/null +++ b/datafusion/wasmtest/.cargo/config.toml @@ -0,0 +1,2 @@ +[target.wasm32-unknown-unknown] +rustflags = ['--cfg', 'getrandom_backend="wasm_js"'] diff --git a/datafusion/wasmtest/Cargo.toml b/datafusion/wasmtest/Cargo.toml index aae66e6b9a972..47fe4248e5538 100644 --- a/datafusion/wasmtest/Cargo.toml +++ b/datafusion/wasmtest/Cargo.toml @@ -58,8 +58,8 @@ datafusion-physical-expr = { workspace = true, default-features = true } datafusion-physical-expr-common = { workspace = true } datafusion-physical-plan = { workspace = true } datafusion-sql = { workspace = true } -# getrandom must be compiled with js feature -getrandom = { version = "0.2.8", features = ["js"] } +# https://docs.rs/getrandom/latest/getrandom/#webassembly-support +getrandom = { version = "0.3.1", features = ["wasm_js"] } parquet = { workspace = true } wasm-bindgen = "0.2.99" From ba28998dba5ceff89c127db249d642f50952e215 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Tue, 4 Feb 2025 11:25:48 +0100 Subject: [PATCH 7/7] Use `rand_chacha` for test portability --- datafusion/core/Cargo.toml | 1 + datafusion/core/tests/dataframe/mod.rs | 118 +++++++++++++------------ 2 files changed, 64 insertions(+), 55 deletions(-) diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 89528425b38ad..a3192a1534814 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -146,6 +146,7 @@ doc-comment = { workspace = true } env_logger = { workspace = true } paste = "^1.0" rand = { workspace = true, features = ["small_rng", "thread_rng"] } +rand_chacha = "0.9.0" rand_distr = "0.5.0" regex = { workspace = true } rstest = { workspace = true } diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index 72456a26b6148..a58e3f5bd81ca 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -3262,31 +3262,34 @@ async fn unnest_columns() -> Result<()> { const NUM_ROWS: usize = 4; let df = table_with_nested_types(NUM_ROWS).await?; let results = df.collect().await?; - let expected = ["+----------+------------------------------------------------+--------------------+", - "| shape_id | points | tags |", - "+----------+------------------------------------------------+--------------------+", - "| 1 | [{x: -3, y: -4}, {x: -3, y: 6}, {x: 2, y: -2}] | [tag1] |", - "| 2 | | [tag1, tag2] |", - "| 3 | [{x: -9, y: 2}, {x: -10, y: -4}] | |", - "| 4 | [{x: -3, y: 5}, {x: 2, y: -1}] | [tag1, tag2, tag3] |", - "+----------+------------------------------------------------+--------------------+"]; + let expected = [ + "+----------+--------------------------------------------------+--------------------------+", + "| shape_id | points | tags |", + "+----------+--------------------------------------------------+--------------------------+", + "| 1 | [{x: -4, y: 2}, {x: -8, y: 3}, {x: 6, y: -2}] | |", + "| 2 | [{x: 4, y: 3}, {x: -6, y: -10}, {x: -10, y: 9}] | [tag1, tag2] |", + "| 3 | | [tag1, tag2, tag3, tag4] |", + "| 4 | [{x: -2, y: -3}, {x: -5, y: -3}, {x: -10, y: 0}] | |", + "+----------+--------------------------------------------------+--------------------------+", + ]; assert_batches_sorted_eq!(expected, &results); // Unnest tags let df = table_with_nested_types(NUM_ROWS).await?; let results = df.unnest_columns(&["tags"])?.collect().await?; let expected = [ - "+----------+------------------------------------------------+------+", - "| shape_id | points | tags |", - "+----------+------------------------------------------------+------+", - "| 1 | [{x: -3, y: -4}, {x: -3, y: 6}, {x: 2, y: -2}] | tag1 |", - "| 2 | | tag1 |", - "| 2 | | tag2 |", - "| 3 | [{x: -9, y: 2}, {x: -10, y: -4}] | |", - "| 4 | [{x: -3, y: 5}, {x: 2, y: -1}] | tag1 |", - "| 4 | [{x: -3, y: 5}, {x: 2, y: -1}] | tag2 |", - "| 4 | [{x: -3, y: 5}, {x: 2, y: -1}] | tag3 |", - "+----------+------------------------------------------------+------+", + "+----------+--------------------------------------------------+------+", + "| shape_id | points | tags |", + "+----------+--------------------------------------------------+------+", + "| 1 | [{x: -4, y: 2}, {x: -8, y: 3}, {x: 6, y: -2}] | |", + "| 2 | [{x: 4, y: 3}, {x: -6, y: -10}, {x: -10, y: 9}] | tag1 |", + "| 2 | [{x: 4, y: 3}, {x: -6, y: -10}, {x: -10, y: 9}] | tag2 |", + "| 3 | | tag1 |", + "| 3 | | tag2 |", + "| 3 | | tag3 |", + "| 3 | | tag4 |", + "| 4 | [{x: -2, y: -3}, {x: -5, y: -3}, {x: -10, y: 0}] | |", + "+----------+--------------------------------------------------+------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -3299,18 +3302,20 @@ async fn unnest_columns() -> Result<()> { let df = table_with_nested_types(NUM_ROWS).await?; let results = df.unnest_columns(&["points"])?.collect().await?; let expected = [ - "+----------+-----------------+--------------------+", - "| shape_id | points | tags |", - "+----------+-----------------+--------------------+", - "| 1 | {x: -3, y: -4} | [tag1] |", - "| 1 | {x: -3, y: 6} | [tag1] |", - "| 1 | {x: 2, y: -2} | [tag1] |", - "| 2 | | [tag1, tag2] |", - "| 3 | {x: -10, y: -4} | |", - "| 3 | {x: -9, y: 2} | |", - "| 4 | {x: -3, y: 5} | [tag1, tag2, tag3] |", - "| 4 | {x: 2, y: -1} | [tag1, tag2, tag3] |", - "+----------+-----------------+--------------------+", + "+----------+-----------------+--------------------------+", + "| shape_id | points | tags |", + "+----------+-----------------+--------------------------+", + "| 1 | {x: -4, y: 2} | |", + "| 1 | {x: -8, y: 3} | |", + "| 1 | {x: 6, y: -2} | |", + "| 2 | {x: -10, y: 9} | [tag1, tag2] |", + "| 2 | {x: -6, y: -10} | [tag1, tag2] |", + "| 2 | {x: 4, y: 3} | [tag1, tag2] |", + "| 3 | | [tag1, tag2, tag3, tag4] |", + "| 4 | {x: -10, y: 0} | |", + "| 4 | {x: -2, y: -3} | |", + "| 4 | {x: -5, y: -3} | |", + "+----------+-----------------+--------------------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -3330,19 +3335,22 @@ async fn unnest_columns() -> Result<()> { "+----------+-----------------+------+", "| shape_id | points | tags |", "+----------+-----------------+------+", - "| 1 | {x: -3, y: -4} | tag1 |", - "| 1 | {x: -3, y: 6} | tag1 |", - "| 1 | {x: 2, y: -2} | tag1 |", - "| 2 | | tag1 |", - "| 2 | | tag2 |", - "| 3 | {x: -10, y: -4} | |", - "| 3 | {x: -9, y: 2} | |", - "| 4 | {x: -3, y: 5} | tag1 |", - "| 4 | {x: -3, y: 5} | tag2 |", - "| 4 | {x: -3, y: 5} | tag3 |", - "| 4 | {x: 2, y: -1} | tag1 |", - "| 4 | {x: 2, y: -1} | tag2 |", - "| 4 | {x: 2, y: -1} | tag3 |", + "| 1 | {x: -4, y: 2} | |", + "| 1 | {x: -8, y: 3} | |", + "| 1 | {x: 6, y: -2} | |", + "| 2 | {x: -10, y: 9} | tag1 |", + "| 2 | {x: -10, y: 9} | tag2 |", + "| 2 | {x: -6, y: -10} | tag1 |", + "| 2 | {x: -6, y: -10} | tag2 |", + "| 2 | {x: 4, y: 3} | tag1 |", + "| 2 | {x: 4, y: 3} | tag2 |", + "| 3 | | tag1 |", + "| 3 | | tag2 |", + "| 3 | | tag3 |", + "| 3 | | tag4 |", + "| 4 | {x: -10, y: 0} | |", + "| 4 | {x: -2, y: -3} | |", + "| 4 | {x: -5, y: -3} | |", "+----------+-----------------+------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -3655,15 +3663,15 @@ async fn unnest_aggregate_columns() -> Result<()> { let df = table_with_nested_types(NUM_ROWS).await?; let results = df.select_columns(&["tags"])?.collect().await?; let expected = [ - r#"+--------------------+"#, - r#"| tags |"#, - r#"+--------------------+"#, - r#"| [tag1] |"#, - r#"| [tag1, tag2] |"#, - r#"| |"#, - r#"| [tag1, tag2, tag3] |"#, - r#"| [tag1, tag2, tag3] |"#, - r#"+--------------------+"#, + r#"+--------------------------+"#, + r#"| tags |"#, + r#"+--------------------------+"#, + r#"| |"#, + r#"| |"#, + r#"| [tag1, tag2, tag3, tag4] |"#, + r#"| [tag1, tag2] |"#, + r#"| [tag1, tag2] |"#, + r#"+--------------------------+"#, ]; assert_batches_sorted_eq!(expected, &results); @@ -3677,7 +3685,7 @@ async fn unnest_aggregate_columns() -> Result<()> { r#"+-------------+"#, r#"| count(tags) |"#, r#"+-------------+"#, - r#"| 9 |"#, + r#"| 8 |"#, r#"+-------------+"#, ]; assert_batches_sorted_eq!(expected, &results); @@ -4242,7 +4250,7 @@ async fn table_with_nested_types(n: usize) -> Result { )); let mut tags_builder = ListBuilder::new(StringBuilder::new()); - let mut rng = StdRng::seed_from_u64(197); + let mut rng = rand_chacha::ChaCha20Rng::seed_from_u64(197); for idx in 0..n { // Append shape id.