From 4e14f596641107a2b70c12a9f0315476310becef Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 18 Jul 2023 07:57:28 -0400 Subject: [PATCH 1/3] Test: Use unchecked access in group hash --- Cargo.toml | 12 +++++++++++- .../core/src/physical_plan/aggregates/row_hash.rs | 5 +++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2db0379b06579..66da8c4553d61 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,4 +56,14 @@ lto = false opt-level = 3 overflow-checks = false panic = 'unwind' -rpath = false \ No newline at end of file +rpath = false + + +[patch.crates-io] +arrow = { git = "https://github.com/alamb/arrow-rs.git", branch="alamb/peephole" } +arrow-array = { git = "https://github.com/alamb/arrow-rs.git", branch="alamb/peephole" } +arrow-buffer = { git = "https://github.com/alamb/arrow-rs.git", branch="alamb/peephole" } +arrow-flight = { git = "https://github.com/alamb/arrow-rs.git", branch="alamb/peephole" } +arrow-schema = { git = "https://github.com/alamb/arrow-rs.git", branch="alamb/peephole" } +arrow-cast = { git = "https://github.com/alamb/arrow-rs.git", branch="alamb/peephole" } +parquet = { git = "https://github.com/alamb/arrow-rs.git",branch="alamb/peephole" } diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash.rs b/datafusion/core/src/physical_plan/aggregates/row_hash.rs index c57f436324079..06e1fcffd44a9 100644 --- a/datafusion/core/src/physical_plan/aggregates/row_hash.rs +++ b/datafusion/core/src/physical_plan/aggregates/row_hash.rs @@ -423,7 +423,8 @@ impl GroupedHashAggregateStream { // verify that a group that we are inserting with hash is // actually the same key value as the group in // existing_idx (aka group_values @ row) - group_rows.row(row) == self.group_values.row(*group_idx) + group_rows.row_unchecked(row) + == self.group_values.row_unchecked(*group_idx) }); let group_idx = match entry { @@ -433,7 +434,7 @@ impl GroupedHashAggregateStream { None => { // Add new entry to aggr_state and save newly created index let group_idx = self.group_values.num_rows(); - self.group_values.push(group_rows.row(row)); + self.group_values.push(group_rows.row_unchecked(row)); // for hasher function, use precomputed hash value self.map.insert_accounted( From 7043c6d2e606f2037baa951d8bb138f67dc05f38 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 18 Jul 2023 08:00:33 -0400 Subject: [PATCH 2/3] unsafe --- .../core/src/physical_plan/aggregates/row_hash.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash.rs b/datafusion/core/src/physical_plan/aggregates/row_hash.rs index 06e1fcffd44a9..08e21053b92d4 100644 --- a/datafusion/core/src/physical_plan/aggregates/row_hash.rs +++ b/datafusion/core/src/physical_plan/aggregates/row_hash.rs @@ -423,8 +423,10 @@ impl GroupedHashAggregateStream { // verify that a group that we are inserting with hash is // actually the same key value as the group in // existing_idx (aka group_values @ row) - group_rows.row_unchecked(row) - == self.group_values.row_unchecked(*group_idx) + unsafe { + group_rows.row_unchecked(row) + == self.group_values.row_unchecked(*group_idx) + } }); let group_idx = match entry { @@ -434,7 +436,9 @@ impl GroupedHashAggregateStream { None => { // Add new entry to aggr_state and save newly created index let group_idx = self.group_values.num_rows(); - self.group_values.push(group_rows.row_unchecked(row)); + unsafe { + self.group_values.push(group_rows.row_unchecked(row)); + } // for hasher function, use precomputed hash value self.map.insert_accounted( From 3c665ccf420ffa957599dc9667639fe1a95ce8a8 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 18 Jul 2023 08:12:15 -0400 Subject: [PATCH 3/3] pin version --- Cargo.toml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 66da8c4553d61..d69bec7ae8bc2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,10 +60,10 @@ rpath = false [patch.crates-io] -arrow = { git = "https://github.com/alamb/arrow-rs.git", branch="alamb/peephole" } -arrow-array = { git = "https://github.com/alamb/arrow-rs.git", branch="alamb/peephole" } -arrow-buffer = { git = "https://github.com/alamb/arrow-rs.git", branch="alamb/peephole" } -arrow-flight = { git = "https://github.com/alamb/arrow-rs.git", branch="alamb/peephole" } -arrow-schema = { git = "https://github.com/alamb/arrow-rs.git", branch="alamb/peephole" } -arrow-cast = { git = "https://github.com/alamb/arrow-rs.git", branch="alamb/peephole" } -parquet = { git = "https://github.com/alamb/arrow-rs.git",branch="alamb/peephole" } +arrow = { git = "https://github.com/alamb/arrow-rs.git", rev="661caf3" } +arrow-array = { git = "https://github.com/alamb/arrow-rs.git", rev="661caf3" } +arrow-buffer = { git = "https://github.com/alamb/arrow-rs.git", rev="661caf3" } +arrow-flight = { git = "https://github.com/alamb/arrow-rs.git", rev="661caf3" } +arrow-schema = { git = "https://github.com/alamb/arrow-rs.git", rev="661caf3" } +arrow-cast = { git = "https://github.com/alamb/arrow-rs.git", rev="661caf3" } +parquet = { git = "https://github.com/alamb/arrow-rs.git", rev="661caf3" }